# **Model Building**

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [None]:
import warnings
warnings.filterwarnings("ignore")

## **Import Dataset**

In [None]:
#import directory if you wanna use kaggle notebook
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv('/kaggle/input/moviesdataset/movies_data.csv')

# if you wanna use local directory you can use this code : 

# df_path = r'D:\gundarTingkat_3\6\Penulisan Ilmiah\Laporan\movies-recommendation\data\movies_data.csv'
# df = pd.read_csv(df_path)

In [None]:
df.head(20)

**Preprocessing Data**

In [None]:
df['genres'] = df['genres'].apply(lambda x: ' '.join(eval(x)))
df['tag'] = df['tag'].apply(lambda x: ' '.join(x.split(',')))

In [None]:
# Remove the year from the title
df['title'] = df['title'].str.replace(r'\(\d{4}\)', '', regex=True).str.strip()

In [None]:
df.head()

**Combine Genres and tag into a single string for each movie**

In [None]:
df['content'] = df['genres'] + ' ' + df['tag']

**TF-IDF**

In [None]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['content'])

**Cosine similarity matrix**

In [None]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

**Recommendation Model**

In [None]:
# Ensure 'Toy Story' is in the sample
if 'Toy Story' not in df['title'].values:
    df_sample = pd.concat([df, df[df['title'] == 'Toy Story']]).reset_index(drop=True)

# Function to get movie recommendations based on similarity for the sample
def get_recommendations(title, cosine_sim=cosine_sim, data=df_sample):
    # Check if the movie title exists in the data
    if title not in data['title'].values:
        return f"Movie title '{title}' not found in the dataset."
    
    # Get the index of the movie that matches the title
    idx = df[df['title'].str.contains(title, case=False)].index[0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Combine title and rating
    recommendations = data.iloc[movie_indices].apply(lambda x: f"{x['title']} (Rating: {x['rating']})", axis=1)
    
    return recommendations.tolist()

# Test the recommendation function with "Casino" on the updated sample

recommendation_input = input('Enter the title of the movie you want: ')
recommendations_system = get_recommendations(recommendation_input)
print(recommendations_system)