In [1]:
#necessary imports
import pandas as pd
import numpy as np
import mlflow
from abc import ABC
import inspect
import nltk
from nltk import WordNetLemmatizer
import re
import string
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer

[nltk_data] Downloading package wordnet to /Users/aman/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /Users/aman/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to /Users/aman/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /Users/aman/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
import os

class DataLoader(ABC):

    def load_data(self) -> pd.DataFrame:
        pass

class DataLinkLoader(DataLoader):

    def __init__(self, data_link:str)->None:

        """Sets up the link for loading the data."""
    

        if data_link == None:
            raise ValueError("Data link cannot be None")
        
        if not isinstance(data_link,str):
            raise ValueError("Data link must be String")
        
        self.data_link = data_link


    def load_data(self)->pd.DataFrame:

        """Returns the pandas dataframe of the data loaded from the provided url"""
        try:
            data = pd.read_csv(self.data_link)
            return data
        except Exception as e:
            raise RuntimeError(f"Failed to load data with error {e}")
        
class LocalStorageLoader(DataLoader):

    def __init__(self, load_path)->pd.DataFrame:

        if not isinstance(load_path, str):
            raise ValueError("Load path must be a string path of data")
        
        if not os.path.exists(load_path):
            raise FileNotFoundError(f"Specified file : {load_path} does not exists")
        
        self.load_path = load_path
      
    def load_data(self):

        """Returns the pandas dataframe of the data loaded from the provided local path"""
        try:
            data = pd.read_csv(self.load_path)
            return data
        
        except Exception as e:
            raise RuntimeError(f"Failed to load data with error {e}")


In [3]:
class DataPreProcessing(ABC):

    def __init__(self, train_data_loader:DataLoader,test_data_loader:DataLoader)->None:
            
            """Initialize the DataPreProcessing class with data loaders."""
            self.train_data_loader = train_data_loader
            self.test_data_loader = test_data_loader
            
            #loader validation
            if not isinstance(self.train_data_loader, DataLoader) or not isinstance(self.test_data_loader, DataLoader):
                raise TypeError("train_data_loader and test_data_loader must be instances of DataLoader class.")
            
            if not hasattr(self.train_data_loader, 'load_data') or not hasattr(self.test_data_loader, 'load_data'):
                raise AttributeError("train_data_loader and test_data_loader must have a 'load_data' method.")
            
            if not callable(self.train_data_loader.load_data) or not callable(self.test_data_loader.load_data):
                raise TypeError("load_data must be a callable method in train_data_loader and test_data_loader.")
            
            try:
                self.train_data = self.train_data_loader.load_data()
                self.test_data = self.test_data_loader.load_data()
            except Exception as e:
                raise RuntimeError(f"Failed to load data: {e}")
            
            #data validations
            if not isinstance(self.train_data, pd.DataFrame) or not isinstance(self.test_data, pd.DataFrame):
                raise TypeError("train_data and test_data must be pandas DataFrames.")
            
            if self.train_data.empty or self.test_data.empty:
                raise ValueError("train_data and test_data cannot be empty DataFrames.")
            
            if not all(col in self.train_data.columns for col in self.test_data.columns):
                raise ValueError("Test data contains columns that are not present in the training data.")
            
            if not all(col in self.test_data.columns for col in self.train_data.columns):
                raise ValueError("Training data contains columns that are not present in the test data.")


    def preprocess(self, col:str, tool_flow:list[str])->pd.DataFrame:

        pass
        
        
    def available_tools(self) ->list:
        
        """Return a list of available preprocessing methods in the class."""

        try:
            methods = [
                name for name, func in inspect.getmembers(self, predicate=inspect.ismethod)
                if func.__self__.__class__ == self.__class__ and not name.startswith("__")
                and name not in ["available_tools", "preprocess"]
            ]
            return methods
        
        except AttributeError as e:
            raise RuntimeError(f"Failed to inspect methods for {self.__class__.__name__}: {e}")
        
        except Exception as e:
            raise RuntimeError(f"Unexpected error in available_tools: {e}")
        
    def get_data(self):
        return self.train_data, self.test_data
    

class TextDataPreProcessing(DataPreProcessing):

    def __init__(self, train_data_loader:DataLoader, test_data_loader:DataLoader)->None:

        """Initialize the TextDataPreProcessing class with data loaders."""
        super().__init__(train_data_loader, test_data_loader)

    def lemmatization(self, text:str)->str:

        """Lemmatize the text."""
        
        lemmatizer = WordNetLemmatizer()
        text = text.split()
        text = [lemmatizer.lemmatize(word) for word in text]
        return " ".join(text)

    def remove_stop_words(self, text:str)->str:
        
        """Remove stop words from the text."""
        
        stop_words = set(stopwords.words("english"))
        text = [word for word in str(text).split() if word not in stop_words]
        return " ".join(text)

    def remove_numbers(self, text:str)->str:
        
        """Remove numbers from the text."""
        
        text = ''.join([char for char in text if not char.isdigit()])
        return text

    def lower_case(self, text:str)->str:
        
        """Convert text to lower case."""
        
        text = text.split()
        text = [word.lower() for word in text]
        return " ".join(text)

    def remove_punctuations(self, text:str)->str:
        
        """Remove punctuations from the text."""
        
        text = re.sub('[%s]' % re.escape(string.punctuation), ' ', text)
        text = text.replace('؛', "")
        text = re.sub('\s+', ' ', text).strip()
        return text

    def remove_urls(self, text:str)->str:
        
        """Remove URLs from the text."""
        
        url_pattern = re.compile(r'https?://\S+|www\.\S+')
        return url_pattern.sub(r'', text)

    def remove_small_sentences(self, text:str):
        
        """Remove sentences with less than 3 words."""
        
        if len(text.split()) < 3:
                return np.nan
        return text

    def available_tools(self):
        return super().available_tools()

    def preprocess(self, col:str, tool_flow=["lower_case", "lemmatization", "remove_stop_words", "remove_numbers", "remove_punctuations", "remove_urls"]) -> pd.DataFrame:
        
        """Preprocess the data using the specified tools in the tool_flow."""
        try:
            
            #tool validations
            if not isinstance(tool_flow, list):
                raise ValueError("tool_flow must be a list of tool names.")
            
            if not all(tool in self.available_tools() for tool in tool_flow):
                raise ValueError("One or more tools in tool_flow are not available in the preprocessing class.")
            
            #column validations
            if not isinstance(col, str):
                raise ValueError("col must be a string representing the column name.")
            
            if col not in self.train_data.columns and col not in self.test_data.columns:
                raise ValueError(f"Column '{col}' does not exist in both Training and Test DataFrames.")
            

            for tool in tool_flow:
                method = getattr(self, tool)
                self.train_data[col] = self.train_data[col].apply(method)
                self.test_data[col] = self.test_data[col].apply(method)

            return self.train_data, self.test_data
        
        except Exception as e:
            raise RuntimeError(f"An error occurred during preprocessing: {e}")
        
    def get_data(self):
        return super().get_data()
    

In [4]:
class FeatureEngineering(ABC):

    def __init__(self,train_data_loader:DataLoader, test_data_loader:DataLoader)->None:
        
        """Initialize the DataPreProcessing class with data loaders."""
        self.train_data_loader = train_data_loader
        self.test_data_loader = test_data_loader
        
        #loader validation
        if not isinstance(self.train_data_loader, DataLoader) or not isinstance(self.test_data_loader, DataLoader):
            raise TypeError("train_data_loader and test_data_loader must be instances of DataLoader class.")
        
        if not hasattr(self.train_data_loader, 'load_data') or not hasattr(self.test_data_loader, 'load_data'):
            raise AttributeError("train_data_loader and test_data_loader must have a 'load_data' method.")
        
        if not callable(self.train_data_loader.load_data) or not callable(self.test_data_loader.load_data):
            raise TypeError("load_data must be a callable method in train_data_loader and test_data_loader.")
        
        try:
            self.train_data = self.train_data_loader.load_data()
            self.test_data = self.test_data_loader.load_data()
        except Exception as e:
            raise RuntimeError(f"Failed to load data: {e}")
        
        #data validations
        if not isinstance(self.train_data, pd.DataFrame) or not isinstance(self.test_data, pd.DataFrame):
            raise TypeError("train_data and test_data must be pandas DataFrames.")
        
        if self.train_data.empty or self.test_data.empty:
            raise ValueError("train_data and test_data cannot be empty DataFrames.")
        
        if not all(col in self.train_data.columns for col in self.test_data.columns):
            raise ValueError("Test data contains columns that are not present in the training data.")
        
        if not all(col in self.test_data.columns for col in self.train_data.columns):
            raise ValueError("Training data contains columns that are not present in the test data.")
        
    
    def available_tools(self) -> list:
        """Return a list of available feature engineering methods in the class."""
        try:
            methods = [
                name for name, func in inspect.getmembers(self, predicate=inspect.ismethod)
                if func.__self__.__class__ == self.__class__ and not name.startswith("__")
                and name not in ["available_tools", "feature_engineering"]
            ]
            return methods
        
        except AttributeError as e:
            raise RuntimeError(f"Failed to inspect methods for {self.__class__.__name__}: {e}")
        
        except Exception as e:
            raise RuntimeError(f"Unexpected error in available_tools: {e}")
    
    def get_data(self)->tuple:
        return self.train_data, self.test_data
        

class TextFeatureEngineering(FeatureEngineering):

    def __init__(self, train_data_loader : DataLoader, test_data_loader :DataLoader):
        super().__init__(train_data_loader, test_data_loader)
    
        
    def apply_bow(self, col: str, max_features: int) -> tuple:
        """
        Apply Count Vectorizer to the train and test data column.
        
        Parameters:
            col (str): Name of the text column.
            max_features (int): Max number of features for BOW.

        Returns:
            Tuple[pd.DataFrame, pd.DataFrame]: Transformed train and test DataFrames with BOW features.
        """
        try:
            vectorizer = CountVectorizer(max_features=max_features)

            self.train_data.dropna(inplace=True)
            self.test_data.dropna(inplace=True)
            
            X_train = self.train_data[col].values
            X_test = self.test_data[col].values

            # Fit on train and transform both
            X_train_bow = vectorizer.fit_transform(X_train)
            X_test_bow = vectorizer.transform(X_test)

            # Convert to DataFrame with feature names
            feature_names = vectorizer.get_feature_names_out()
            X_train_bow_df = pd.DataFrame(X_train_bow.toarray(), columns=feature_names, index=self.train_data.index)
            X_test_bow_df = pd.DataFrame(X_test_bow.toarray(), columns=feature_names, index=self.test_data.index)

            # Update datasets
            self.train_data = pd.concat([self.train_data.drop(columns=[col]), X_train_bow_df], axis=1)
            self.test_data = pd.concat([self.test_data.drop(columns=[col]), X_test_bow_df], axis=1)

            return self.train_data, self.test_data

        except Exception as e:
            raise e
    
    def apply_tf_idf(self, col:str):
        
        """Calculate the TF-IDF score for each word in the text."""
        from sklearn.feature_extraction.text import TfidfVectorizer
        
        self.train_data.dropna(inplace=True)
        self.test_data.dropna(inplace=True)
        
        vectorizer = TfidfVectorizer()
        X_train = self.train_data[col].values
        X_test = self.test_data[col].values

        
        # Fit on train and transform both
        X_train_tfidf = vectorizer.fit_transform(X_train)
        X_test_tfidf = vectorizer.transform(X_test)
        
        feature_names = vectorizer.get_feature_names_out()
        # Convert to DataFrame with feature names
        feature_names = vectorizer.get_feature_names_out()
        X_train_tfidf_df = pd.DataFrame(X_train_tfidf.toarray(), columns=feature_names, index=self.train_data.index)
        X_test_tfidf_df = pd.DataFrame(X_test_tfidf.toarray(), columns=feature_names, index=self.test_data.index)

        # X_train_tfidf_df.drop(columns="sentiment", inplace=True)
        # X_test_tfidf_df.drop(columns="sentiment", inplace=True)

        # Update datasets
        self.train_data = pd.concat([self.train_data.drop(columns=[col]), X_train_tfidf_df], axis=1)
        self.test_data = pd.concat([self.test_data.drop(columns=[col]), X_test_tfidf_df], axis=1)

        return self.train_data, self.test_data
    

    def label_encode(self, col:str):

        from sklearn.preprocessing import LabelEncoder

        lab_enc = LabelEncoder()

        self.train_data[col] = lab_enc.fit_transform(self.train_data[col])
        self.test_data[col] = lab_enc.transform(self.test_data[col])

        return self.train_data , self.test_data
    

        
        

In [5]:
import os

class DataUploader(ABC):

    def upload_data(self, df, name):
        pass

class LocalStorageUploader(DataUploader):

    def __init__(self, upload_path):
        self.upload_path = upload_path

    def upload_data(self, df, name):

        os.makedirs(self.upload_path, exist_ok=True)
        path = os.path.join(self.upload_path, name)
        df.to_csv(path, index=False)

    

In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib
import pickle

class ModelFactory:
    
    @staticmethod
    def get_model(model: str):

        models_map = {"randomforest": RandomForestClassifier,
                       "gradientboost": GradientBoostingClassifier}
        try:
            if model not in models_map:
                raise ValueError(f"Unsupported model passed. We support following models : {models_map.keys()}")
            return models_map[model]
        except Exception as e:
            raise e
        
    @staticmethod
    def model_params(model_name:str, model):
        
        model_params_map = {"randomforest": {"n_estimators": model.n_estimators, "max_depth":model.max_depth},
                       "gradientboost": {"n_estimators": model.n_estimators, "max_depth":model.max_depth}}
        
        try:
            return model_params_map[model_name]
        except Exception as e:
            raise e

   

class ModelBuilding:
    def __init__(self, model: str, hyperparameters=None):
        try:
            self.model_cls = ModelFactory().get_model(model)
            if hyperparameters:
                self.model = self.model_cls(**hyperparameters)
            else:
                self.model = self.model_cls()
        except Exception as e:
            raise e

    def fit(self, data_loader, output_col):
        try:
            data = data_loader.load_data()
            Y_train = data[output_col]
            X_train = data.drop(columns=[output_col])
            self.model.fit(X_train, Y_train)
            return self.model
        except Exception as e:
            raise e

    def get_model(self):
        return self.model


    def save_model(self, path):
        try:
            with open(path, "wb") as f:
                pickle.dump(self.model, f)
        except Exception as e:
            raise e


class ModelEvaluation:

    def __init__(self, model_path):
        self.model_path = model_path

    def load_model(self):

        """Load the trained model from a file."""
        try:
            with open(self.model_path, 'rb') as file:
                self.model = pickle.load(file)
        
        except FileNotFoundError:
            raise FileNotFoundError(f"Existing file : {self.model_path} does not exist")
        except Exception as e:           
            raise

    
    def evaluate(self, data_loader, output_col):

        data = data_loader.load_data()
        X = data.drop(columns = [output_col])
        y_true = data[output_col]
        try:
            y_pred = self.model.predict(X)
            return {
                "accuracy": accuracy_score(y_true, y_pred),
                "precision": precision_score(y_true, y_pred, average='macro', zero_division=0),
                "recall": recall_score(y_true, y_pred, average='macro', zero_division=0),
                "f1_score": f1_score(y_true, y_pred, average='macro', zero_division=0)
            }
        except Exception as e:
            raise e
        
    def save_metrics(self):
        pass
        


In [95]:
#orchestration 
from sklearn.model_selection import train_test_split
data_loader = DataLinkLoader("https://raw.githubusercontent.com/campusx-official/jupyter-masterclass/main/tweet_emotions.csv")

df = data_loader.load_data()
df = df[df["sentiment"].isin(["happiness", "sadness"])]

def prepare_data(df, drop_cols, test_size):
    df.drop(columns=drop_cols, inplace=True)
    train_df, test_df = train_test_split(df, test_size=test_size,random_state=42)
    return train_df, test_df

train_df, test_df = prepare_data(df, ["tweet_id"], 0.2)
train_df.dropna(inplace=True)
test_df.dropna(inplace = True)
uploader = LocalStorageUploader("data/raw")
uploader.upload_data(train_df, "train.csv")
uploader.upload_data(test_df, "test.csv")


#new file data preprocess.py
raw_train_loader = LocalStorageLoader("data/raw/train.csv")
raw_test_loader = LocalStorageLoader("data/raw/test.csv")
text_preprocessor = TextDataPreProcessing(raw_train_loader, raw_test_loader)
train_df, test_df = text_preprocessor.preprocess("content")
train_df.dropna(inplace=True)
test_df.dropna(inplace = True)
uploader = LocalStorageUploader("data/interim")
uploader.upload_data(train_df, "preprocess_train.csv")
uploader.upload_data(test_df, "preprocess_test.csv")


#new file feature_eng.py
preprocess_train_loader = LocalStorageLoader("data/interim/preprocess_train.csv")
preprocess_test_loader = LocalStorageLoader("data/interim/preprocess_test.csv")
feature_eng = TextFeatureEngineering(preprocess_train_loader, preprocess_test_loader)
feature_eng.apply_bow("content", max_features=1500)
feature_eng.label_encode("sentiment")

train_df, test_df = feature_eng.get_data()

uploader = LocalStorageUploader("data/processed")

uploader.upload_data(train_df, "processed_train.csv")
uploader.upload_data(test_df, "processed_test.csv")


#new file model training.py 
processed_train_loader = LocalStorageLoader("data/processed/processed_train.csv")
model = ModelBuilding("randomforest")
model.fit(processed_train_loader, "sentiment")
model.save_model("/Users/aman/Developer/mlops/EMOTION_DETECTION/models/emotion_detector_model.pkl")


#newfile model_evaluation
processed_test_loader = LocalStorageLoader("data/processed/processed_test.csv")
evaluator = ModelEvaluation("/Users/aman/Developer/mlops/EMOTION_DETECTION/models/emotion_detector_model.pkl")
evaluator.load_model()
evaluation_dict = evaluator.evaluate(processed_test_loader, "sentiment")
print(evaluation_dict)

{'accuracy': 0.766152362584378, 'precision': 0.7681226639912038, 'recall': 0.7670323024822299, 'f1_score': 0.7660322096766727}


In [8]:
from mlflow.models.signature import infer_signature


mlflow.set_tracking_uri("http://ec2-16-171-116-127.eu-north-1.compute.amazonaws.com:5000/")

models = ["randomforest", "gradientboost"]

fe_tech = ["bow", "tfidf"]

mlflow.set_experiment(experiment_name= "EmotionDetection - BOW vs TFIDF")

with mlflow.start_run(run_name="Emotion_Detection_runs") as parent_run:

     for tech in fe_tech:
          preprocess_train_loader = LocalStorageLoader("data/interim/preprocess_train.csv")
          preprocess_test_loader = LocalStorageLoader("data/interim/preprocess_test.csv")
          feature_eng = TextFeatureEngineering(preprocess_train_loader, preprocess_test_loader)
          feature_eng.label_encode("sentiment")
          if tech == "bow":
               feature_eng.apply_bow("content", max_features=1500)
          elif tech == "tfidf":
               feature_eng.apply_tf_idf("content")

          train_df, test_df = feature_eng.get_data()
          uploader = LocalStorageUploader("data/processed")
          uploader.upload_data(train_df, "processed_train.csv")
          uploader.upload_data(test_df, "processed_test.csv")
                
          for model_name in models:

               with mlflow.start_run(run_name=f"{model_name}_with_{tech}", nested=True) as child_run:
                     
                    processed_train_loader = LocalStorageLoader("data/processed/processed_train.csv")
                    model = ModelBuilding(model_name)
                    model.fit(processed_train_loader, "sentiment")
                    model.save_model("/Users/aman/Developer/mlops/EMOTION_DETECTION/models/emotion_detector_model.pkl")
                    fit_model = model.get_model()


                    #newfile model_evaluation
                    processed_test_loader = LocalStorageLoader("data/processed/processed_test.csv")
                    evaluator = ModelEvaluation("/Users/aman/Developer/mlops/EMOTION_DETECTION/models/emotion_detector_model.pkl")
                    evaluator.load_model()
                    evaluation_dict_test = evaluator.evaluate(processed_test_loader, "sentiment")
                    evaluation_dict_train = evaluator.evaluate(processed_train_loader, "sentiment")
                 
                    evaluation_dict = {"train":evaluation_dict_train, "test": evaluation_dict_test}

                    for outer_key, inner_dict in evaluation_dict.items():
                         for inner_key, val in inner_dict.items():
                              mlflow.log_metric(f"{outer_key}_{inner_key}", val)

                    mlflow.log_params(ModelFactory().model_params(model_name, fit_model))
                    mlflow.log_param("Vectorizer", tech)
                    mlflow.log_param("model", model_name)
                    mlflow.sklearn.log_model(model, "model")
                    mlflow.log_artifacts("/Users/aman/Developer/mlops/EMOTION_DETECTION/notebooks/experiments.ipynb")
                    

                    
                    

        



🏃 View run randomforest_with_bow at: http://ec2-16-171-116-127.eu-north-1.compute.amazonaws.com:5000/#/experiments/331030979012058125/runs/8d87ab91e18c4398972bd9933f09e5bf
🧪 View experiment at: http://ec2-16-171-116-127.eu-north-1.compute.amazonaws.com:5000/#/experiments/331030979012058125




🏃 View run gradientboost_with_bow at: http://ec2-16-171-116-127.eu-north-1.compute.amazonaws.com:5000/#/experiments/331030979012058125/runs/a1a7178730f14368a2cd5c9ec267fe42
🧪 View experiment at: http://ec2-16-171-116-127.eu-north-1.compute.amazonaws.com:5000/#/experiments/331030979012058125




🏃 View run randomforest_with_tfidf at: http://ec2-16-171-116-127.eu-north-1.compute.amazonaws.com:5000/#/experiments/331030979012058125/runs/a716a69f850249e0a0a24bdb6de23e7b
🧪 View experiment at: http://ec2-16-171-116-127.eu-north-1.compute.amazonaws.com:5000/#/experiments/331030979012058125




🏃 View run gradientboost_with_tfidf at: http://ec2-16-171-116-127.eu-north-1.compute.amazonaws.com:5000/#/experiments/331030979012058125/runs/82b75738c9d3400b8fa03f800c25154d
🧪 View experiment at: http://ec2-16-171-116-127.eu-north-1.compute.amazonaws.com:5000/#/experiments/331030979012058125
🏃 View run Emotion_Detection_runs at: http://ec2-16-171-116-127.eu-north-1.compute.amazonaws.com:5000/#/experiments/331030979012058125/runs/ea477776774f4913b1d76083e4c51a08
🧪 View experiment at: http://ec2-16-171-116-127.eu-north-1.compute.amazonaws.com:5000/#/experiments/331030979012058125


In [None]:
#hyper parameter fine tunning BOW 


from mlflow.models.signature import infer_signature
from sklearn.model_selection import GridSearchCV

model = ModelBuilding(model_name)
param_grid = {"max_depth" :  [10,15,20,30],
              "n_estimators": [10,20,30,40]}

grid_search_model = GridSearchCV(model, param_grid=param_grid, cv=5, scoring="f1", n_jobs=-1)

mlflow.set_tracking_uri("http://ec2-16-171-116-127.eu-north-1.compute.amazonaws.com:5000/")

models = ["randomforest", "gradientboost"]

fe_tech = ["bow", "tfidf"]

mlflow.set_experiment(experiment_name= "EmotionDetection - BOW FineTuning")

with mlflow.start_run(run_name="Emotion_Detection_runs") as parent_run:

     for tech in fe_tech:
          preprocess_train_loader = LocalStorageLoader("data/interim/preprocess_train.csv")
          preprocess_test_loader = LocalStorageLoader("data/interim/preprocess_test.csv")
          feature_eng = TextFeatureEngineering(preprocess_train_loader, preprocess_test_loader)
          feature_eng.label_encode("sentiment")
          if tech == "bow":
               feature_eng.apply_bow("content", max_features=1500)
          elif tech == "tfidf":
               feature_eng.apply_tf_idf("content")

          train_df, test_df = feature_eng.get_data()
          uploader = LocalStorageUploader("data/processed")
          uploader.upload_data(train_df, "processed_train.csv")
          uploader.upload_data(test_df, "processed_test.csv")
                
          for model_name in models:

               with mlflow.start_run(run_name=f"{model_name}_with_{tech}", nested=True) as child_run:
                     
                    processed_train_loader = LocalStorageLoader("data/processed/processed_train.csv")
                    model = ModelBuilding(model_name)
                    model.fit(processed_train_loader, "sentiment")
                    model.save_model("/Users/aman/Developer/mlops/EMOTION_DETECTION/models/emotion_detector_model.pkl")
                    fit_model = model.get_model()


                    #newfile model_evaluation
                    processed_test_loader = LocalStorageLoader("data/processed/processed_test.csv")
                    evaluator = ModelEvaluation("/Users/aman/Developer/mlops/EMOTION_DETECTION/models/emotion_detector_model.pkl")
                    evaluator.load_model()
                    evaluation_dict_test = evaluator.evaluate(processed_test_loader, "sentiment")
                    evaluation_dict_train = evaluator.evaluate(processed_train_loader, "sentiment")
                 
                    evaluation_dict = {"train":evaluation_dict_train, "test": evaluation_dict_test}

                    for outer_key, inner_dict in evaluation_dict.items():
                         for inner_key, val in inner_dict.items():
                              mlflow.log_metric(f"{outer_key}_{inner_key}", val)

                    mlflow.log_params(ModelFactory().model_params(model_name, fit_model))
                    mlflow.log_param("Vectorizer", tech)
                    mlflow.log_param("model", model_name)
                    mlflow.sklearn.log_model(model, "model")
                    mlflow.log_artifacts("/Users/aman/Developer/mlops/EMOTION_DETECTION/notebooks/experiments.ipynb")
                    

                    
                    

        