In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd

file_path = "/content/drive/My Drive/Project_dataset/movies.csv"
df = pd.read_csv(file_path)
print(df.head())

   Movie_ID            Title             Genre  \
0         1    The Godfather      Crime, Drama   
1         2     Pulp Fiction      Crime, Drama   
2         3       Fight Club             Drama   
3         4  The Dark Knight     Action, Crime   
4         5        Inception  Sci-Fi, Thriller   

                                        Plot_Summary  \
0  The aging patriarch of an organized crime dyna...   
1  The lives of two mob hitmen, a boxer, a gangst...   
2  An insomniac office worker and a devil-may-car...   
3  Batman faces his greatest challenge yet as he ...   
4  A thief with the ability to enter dreams and s...   

                               Keywords  IMDb_Rating Release_Type  \
0                   mafia, crime, power          9.2      Classic   
1             crime, non-linear, hitman          8.9      Classic   
2  underground, psychological, violence          8.8      Classic   
3            batman, joker, crime, hero          9.0      Classic   
4   dreams, heist

In [3]:
# Handle missing values and remove duplicates
df.fillna("", inplace=True)
df.drop_duplicates(inplace=True)

# Combine text features: Genre, Plot Summary, and Keywords
df["combined_features"] = df["Genre"] + " " + df["Plot_Summary"] + " " + df["Keywords"]

# Display processed data
df[["Title", "combined_features"]].head()

Unnamed: 0,Title,combined_features
0,The Godfather,"Crime, Drama The aging patriarch of an organiz..."
1,Pulp Fiction,"Crime, Drama The lives of two mob hitmen, a bo..."
2,Fight Club,Drama An insomniac office worker and a devil-m...
3,The Dark Knight,"Action, Crime Batman faces his greatest challe..."
4,Inception,"Sci-Fi, Thriller A thief with the ability to e..."


In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize TF-IDF Vectorizer and transform text data into TF-IDF vectors
vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(df["combined_features"])

# Get feature names (words)
feature_names = vectorizer.get_feature_names_out()

# Convert TF-IDF matrix to DataFrame for easier visualization
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=feature_names)

In [8]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_movies(user_input, top_n=5):
    # Convert user input into a TF-IDF vector
    user_tfidf = vectorizer.transform([user_input])

    # Compute cosine similarity between user input and movie dataset
    similarity_scores = cosine_similarity(user_tfidf, tfidf_matrix).flatten()

    # Get indices of top N most similar movies
    top_indices = similarity_scores.argsort()[::-1][:top_n]

    # Create recommendations
    recommendations = df.iloc[top_indices][["Title", "Genre", "IMDb_Rating", "Plot_Summary"]].copy()
    recommendations["Similarity_Score"] = similarity_scores[top_indices]
    recommendations.drop_duplicates(subset=["Title"], inplace=True) #recommend unique movies only

    return recommendations

In [9]:
# Take user input
user_input = input("\n🔍 Enter a brief description of the type of movies you like: ")
top_n = int(input("🎬 How many recommendations would you like? (Default is 5): ") or 5)



🔍 Enter a brief description of the type of movies you like: i want to watch modern thriller movies
🎬 How many recommendations would you like? (Default is 5): 4


In [10]:
recommended_movies = recommend_movies(user_input, top_n)

# Display recommendations
print("\n **Top Movie Recommendations for You:** \n")
display(recommended_movies)



 **Top Movie Recommendations for You:** 



Unnamed: 0,Title,Genre,IMDb_Rating,Plot_Summary,Similarity_Score
136,Enemy,"Mystery, Thriller",6.9,A man discovers he has a doppelgänger and beco...,0.317476
87,Gone Girl,"Thriller, Mystery",8.1,A man becomes the prime suspect in his wife's ...,0.31208
193,Battle Royale,"Action, Thriller",7.6,A group of students are forced into a deadly s...,0.306532
166,Prisoners,"Thriller, Crime",8.1,A father takes matters into his own hands when...,0.298402


In [None]:
'''
My salary expectations per month is 2500 USD.
'''