In [1]:
%pwd

'c:\\Users\\aarav\\Downloads\\MoviesRecommendationSystemMLops\\research'

In [2]:
import os
os.chdir('../')

In [3]:
%pwd

'c:\\Users\\aarav\\Downloads\\MoviesRecommendationSystemMLops'

In [23]:
from dataclasses import dataclass
from pathlib import Path 

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path 
    movies_model_name: str 
    similarity_model_name: str
    max_features: int 

In [5]:
#Configuration Manager

In [24]:
from MoviesRecommendationSystemMLops.constants import *
from MoviesRecommendationSystemMLops.utils.common import read_yaml, create_directories


In [25]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH,
            schema_filepath = SCHEMA_FILE_PATH):
        
            self.config = read_yaml(config_filepath)
            self.params = read_yaml(params_filepath)
            self.schema = read_yaml(schema_filepath)

            create_directories([self.config.artifacts_root])
    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
          config = self.config.model_trainer 
          params = self.params.CountVectorizer 
          schema = self.schema.COLUMNS
          
          create_directories([config.root_dir])

          model_trainer_config = ModelTrainerConfig(
                root_dir=config.root_dir,
                data_path=config.data_path,
                movies_model_name= config.movies_model_name,
                similarity_model_name= config.similarity_model_name,
                max_features = params.max_features,
          )

          return model_trainer_config

In [26]:
import pandas as pd
import os 
from MoviesRecommendationSystemMLops import logger 
import joblib 
from sklearn.feature_extraction.text import CountVectorizer
import nltk 
from nltk.stem.porter import PorterStemmer
from sklearn.metrics.pairwise import cosine_similarity

In [27]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config 
    
    def train(self):
        data = pd.read_csv(self.config.data_path)
        cv = CountVectorizer(max_features=self.config.max_features,stop_words='english')     
        vectors = cv.fit_transform(data['tag']).toarray()
        ps = PorterStemmer()
        def stem(text):
            y = []
            for i in text.split():
                y.append(ps.stem(i))
    
            return " ".join(y) 
        data['tag'] = data['tag'].apply(stem)

        similarity = cosine_similarity(vectors)
        sorted(list(enumerate(similarity[0])),reverse=True,key=lambda x:x[1])[1:6]

        def recommend(movie):
            movie_index = data[data['title'] == movie].index[0]
            distances = similarity[movie_index]
            movies_list = sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[1:6]
            for i in movies_list:
              print(i)

       

        # Save the trained model
# Save the trained model
        model_path = os.path.join(self.config.root_dir, self.config.movies_model_name)
        joblib.dump(data.to_dict(), open(model_path, 'wb'))

            # Save the similarity model
        similarity_path = os.path.join(self.config.root_dir, self.config.similarity_model_name)
        joblib.dump(similarity, open(similarity_path, 'wb'))


In [14]:
# Pipeline

In [28]:
try: 
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2024-01-19 01:33:27,865: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-01-19 01:33:27,868: INFO: common: yaml file: params.yaml loaded successfully]
[2024-01-19 01:33:27,870: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-01-19 01:33:27,871: INFO: common: created directory at: artifacts]
[2024-01-19 01:33:27,873: INFO: common: created directory at: artifacts/model_trainer]
