In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

from sklearn.neighbors import NearestNeighbors

import warnings






In [20]:
def make_recommendation(Title, Release_Date, Rating, Directed_by, Genres):
    # Load the dataset
    movies_df = pd.read_csv('cleaned_movies_data.csv')
    
    # Define meta and feature columns
    meta_cols = ['Title', 'Directed_by', 'Release_Date', 'Rating', 'Genres']
    feature_cols = ['Release_Date', 'Rating', 'No_of_Persons_Voted', 'Duration', 'Genres']
    
    # Define preprocessing pipelines
    numeric_features = ['Rating', 'No_of_Persons_Voted']
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])
    
    categorical_features = ['Genres']
    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent', missing_values=pd.NA)),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))
    ])
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)
        ]
    )
    
    Title = movies_df.loc[(movies_df.Release_Date == Release_Date) & (movies_df.Rating == Rating) & (movies_df.Directed_by == Directed_by) & (movies_df.Genres == Genres)] \
            .sort_values(by="Rating", ascending=False).Title.values[0]

    # Prepare the features for modeling
    X = movies_df.loc[:, feature_cols]
    
    preprocessor.fit(X)
    X_preprocessed = preprocessor.transform(X)
    
    # Set k to a fixed number of neighbors (e.g., 5)
    k = 5
    model1 = NearestNeighbors(n_neighbors=k, metric='cosine')
    
    model1.fit(X_preprocessed)
    
    # Get features for the selected movie
    movie_features = movies_df.loc[movies_df.Title == Title, feature_cols]
    movie_features_preprocessed = preprocessor.transform(movie_features)
    
    distances, indices = model1.kneighbors(movie_features_preprocessed)
    
    # Get the recommended movies
    recommended_movies = movies_df.iloc[indices[0]]
    recommended_movies["distances"] = distances[0]
    
    # Sort by distance and return the recommendations
    recommended_movies = recommended_movies.sort_values(by="distances")
    
    return recommended_movies



In [22]:
Title = "The Godfather"
Release_Date = "1972-03-24" 
Rating = 9.3 
Directed_by = "Francis Ford Coppola"
Genres = "Crime"

response = make_recommendation(Title, Release_Date, Rating, Directed_by, Genres)

print(DataFrame(response))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommended_movies["distances"] = distances[0]


Unnamed: 0,Title,Release_Date,Rating,No_of_Persons_Voted,Directed_by,Duration,Genres,distances
5,The Godfather,1972-03-24,9.3,4102,Francis Ford Coppola,2 h 55 m,Crime,1.110223e-16
48,Pulp Fiction,1994-10-14,8.9,3197,Quentin Tarantino,2 h 34 m,Crime,0.0005169354
6489,Joker,2019-10-03,8.8,7742,Todd Phillips,2 h 2 m,Crime,0.006088398
711,The Dark Knight,2008-07-18,8.9,5286,Christopher Nolan,2 h 32 m,Action,0.008750496
4126,The Hateful Eight,2015-12-25,7.6,1750,Quentin Tarantino,2 h 48 m,Crime,0.00898673


In [None]:
# Load the saved cosine similarity matrix
with open('cosine_similarity.pkl', 'rb') as f:
    loaded_cosine_sim = pickle.load(f)

# Load the saved movie data
with open('movie_data.pkl', 'rb') as f:
    loaded_data = pickle.load(f)


# Example movie title
recommended_movies = recommend_movies('Inception')
print(recommended_movies)


In [None]:
def make_recommendation(Title, Release_Date, Rating, Directed_by, Genres):
    # Load the dataset
    movies_df = pd.read_csv('cleaned_movies_data.csv')
    
    meta_cols = ['Title', 'Directed_by']
    feature_cols = ['Release_Date', 'Rating', 'No_of_Persons_Voted', 'Duration', 'Genres']
    
    
    
    numeric_features = ['Rating', 'No_of_Persons_Voted']
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')), #?
        ('scaler', StandardScaler())])
    
    binary_features = []
    binary_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy = 'most_frequent', missing_values=pd.NA)),
        ('label', OrdinalEncoder())])
    
    categorical_features = ['Genres']
    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy = 'most_frequent', missing_values=pd.NA)),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))])
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('binary', binary_transformer, binary_features),
            ('cat', categorical_transformer, categorical_features)])
    
    Title = df.loc[(df.Release_Date == Release_Date) & (df.Rating == Rating) & (df.Directed_By == Directed_By) & (df.Genres == Genres)] \
            .sort_values(by="Rating", ascending=False).movie_id.values[0]
    
    X = df.loc[:, feature_cols]
    
    preprocessor.fit(X)
    X_preprocessed = preprocessor.transform(X)
    
    k = Genres
    model1 = NearestNeighbors(n_neighbors=k, metric='cosine')
    
    model1.fit(X_preprocessed)
    
    movie_features = df.loc[df.movie_id == movie_id, feature_cols]
    movie_features_preprocessed = preprocessor.trasnform(movie_features)
    
    distances, indices = model1.kneighbors(movie_features_preprocessed)
    
    movies = df.iloc[indices[0]]
    movies["distances"] = distances[0]
    
    cols = movies.columns
    movies = movies.loc[:,cols]
    movies = movies.sort_values(by="distances")
    
    return movies.to_dict(orient="records")