In [1]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [27]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD
from sklearn.neighbors import NearestNeighbors
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Bidirectional
import numpy as np

In [28]:
df= pd.read_csv("C:\\Users\\yukth\\Downloads\\Dataset_with_Movie_Ratings.csv")

In [29]:
print("Dataset Info:")
data_info = data.info()

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1883 entries, 0 to 1882
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Movie_ID      1883 non-null   int64 
 1   Title         1883 non-null   object
 2   Year          1883 non-null   int64 
 3   Genre         1883 non-null   object
 4   Movie_Rating  1883 non-null   int64 
dtypes: int64(3), object(2)
memory usage: 73.7+ KB


In [30]:
print("\nFirst 5 rows of the dataset:")
data_head = data.head()
print(data_head)


First 5 rows of the dataset:
   Movie_ID         Title  Year     Genre  Movie_Rating
0     34759      Aaghaaz   2000  Thriller             3
1     34760  Aaj Ka Ravan  2000     Drama             5
2     34761       Anjaane  2000   Romance             2
3     34762   Anokha Moti  2000    Family             4
4     34763  Apradhi Kaun  2000  Thriller             2


In [31]:
print("\nMissing values in each column:")
missing_values = data.isnull().sum()
print(missing_values)


Missing values in each column:
Movie_ID        0
Title           0
Year            0
Genre           0
Movie_Rating    0
dtype: int64


In [32]:
print("\nSummary statistics for numeric columns:")
summary_stats = data.describe()
print(summary_stats)


Summary statistics for numeric columns:
           Movie_ID         Year  Movie_Rating
count   1883.000000  1883.000000   1883.000000
mean   35925.870951  2010.570366      2.976633
std      634.243026     5.436594      1.412140
min    34759.000000  2000.000000      1.000000
25%    35392.000000  2006.000000      2.000000
50%    35951.000000  2011.000000      3.000000
75%    36464.500000  2015.000000      4.000000
max    36972.000000  2019.000000      5.000000


In [33]:
print("\nUnique values in the 'Genre' column:")
unique_genres = data['Genre'].unique()
print(unique_genres)


Unique values in the 'Genre' column:
['Thriller' 'Drama' 'Romance' 'Family' 'Drama, Social' 'Action'
 'Action, Drama' 'Comedy, Drama' 'Comedy' 'Horror' 'Action, Romance'
 'Romance, Drama' 'Comedy, Romance' 'Drama, Family, Thriller' 'Crime'
 'Comedy, Drama, Romance' 'Comedy, romance' 'Romance, Drama, Musical'
 'Action, romance' 'Romance, drama' 'Drama, War, Thriller'
 'Drama, Romance, Musical, Family' 'Comedy, Drama, Romance, Musical'
 'Comedy, Musical' 'Drama, Romance' 'Thriller, Action, Romance'
 'Suspense, Thriller' 'Family Drama' 'Action Comedy'
 'Thriller, Suspense, Crime' 'Thriller, Horror' 'History'
 'Drama, Romance, Social' 'Drama, Romance, Musical, Social'
 'Drama, Comedy, Romance, Musical' 'Drama, War, Romance, Action'
 'Action, Drama, Thriller' 'Drama, Romance, Family'
 'Drama, Musical, Social' 'Drama, Romance, Musical, Crime'
 'Comedy, Family' 'Drama, Romance, Thriller' 'Historical Drama'
 'Thriller, Action' 'Historical' 'Action, Thriller' 'Action, Comedy'
 "Children's" 'Co

In [34]:
missing_values = data.isnull().sum()

In [35]:
print("Missing values:")
print(missing_values)


Missing values:
Movie_ID        0
Title           0
Year            0
Genre           0
Movie_Rating    0
dtype: int64


In [36]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(data['Genre'])

In [37]:
def recommend_svd(title, top_n=10):
    """
    Recommend movies using SVD-based latent semantic analysis.

    Parameters:
    - title (str): Input movie title.
    - top_n (int): Number of recommendations to return.

    Returns:
    - List of recommended movie titles.
    """
    if title not in data['Title'].values:
        return f"Error: Movie title '{title}' not found in the dataset."
    
    # Apply Truncated SVD
    n_components = 50
    svd = TruncatedSVD(n_components=n_components, random_state=42)
    svd_matrix = svd.fit_transform(tfidf_matrix)
    
    # Compute cosine similarity
    cosine_sim = cosine_similarity(svd_matrix, svd_matrix)
    
    # Get the input movie index
    idx = data.index[data['Title'] == title].tolist()[0]
    
    # Sort by similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get top-n similar movie indices
    movie_indices = [i[0] for i in sim_scores[1:top_n + 1]]
    return data['Title'].iloc[movie_indices].tolist()

In [38]:
def recommend_knn(title, top_n=10):
    """
    Recommend movies using KNN based on TF-IDF vector space.

    Parameters:
    - title (str): Input movie title.
    - top_n (int): Number of recommendations to return.

    Returns:
    - List of recommended movie titles.
    """
    if title not in data['Title'].values:
        return f"Error: Movie title '{title}' not found in the dataset."
    
    # Fit the KNN model
    knn = NearestNeighbors(metric='cosine', algorithm='brute')
    knn.fit(tfidf_matrix)
    
    # Get the input movie index
    idx = data.index[data['Title'] == title].tolist()[0]
    
    # Find nearest neighbors
    distances, indices = knn.kneighbors(tfidf_matrix[idx], n_neighbors=top_n + 1)
    
    # Return recommended movie titles (excluding the input movie)
    recommended = data.iloc[indices[0][1:]].Title.tolist()
    return recommended


In [39]:
def user_input_recommendation():
    
    print("Welcome to the Movie Recommendation System!")
    print("1. KNN Recommendations")
    print("2. SVD Recommendations")
    
    # Get user choice
    choice = input("Choose a recommendation method (1 or 2): ").strip()
    
    # Get movie title input
    movie_title = input("Enter the title of a movie: ").strip()
    
    # Handle recommendations based on user choice
    if choice == "1":
        print("KNN Recommendations:")
        print(recommend_knn(movie_title))
    elif choice == "2":
        print("SVD Recommendations:")
        print(recommend_svd(movie_title))
    else:
        print("Invalid choice. Please enter 1 or 2.")

# Call the user interaction function
user_input_recommendation()


Welcome to the Movie Recommendation System!
1. KNN Recommendations
2. SVD Recommendations


Choose a recommendation method (1 or 2):  1
Enter the title of a movie:  Good News


KNN Recommendations:
['99\n', 'Dhoondte Reh Jaaoge\n', 'Hey Bro', 'Welcome To New York', 'Little Zizou\n', 'Main Madhuri Dixit Banna Chahti Hoon', 'Lottery\n', 'Praan Jaye Par Shaan Na Jaye', 'Paying Guests', 'Raja Abroadiya']


In [40]:
def recommend_by_year(year):
    """Returns a list of movies released in the given year, sorted by rating."""
    recommendations = df[df['Year'] == year].sort_values(by='Movie_Rating', ascending=False)
    if recommendations.empty:
        return "No recommendations available for this year."
    return recommendations[['Title', 'Movie_Rating']].to_string(index=False)

In [41]:
def user_input_recommendation():
    print("Welcome to the Movie Recommendation System!")
    print("1. KNN Recommendations")
    print("2. SVD Recommendations")
    print("3. Recommendations by Year")
    
    choice = input("Choose a recommendation method (1, 2, or 3): ").strip()
    
    if choice in ["1", "2"]:
        movie_title = input("Enter the title of a movie: ").strip()
        if choice == "1":
            print("KNN Recommendations:")
            print(recommend_knn(movie_title))
        elif choice == "2":
            print("SVD Recommendations:")
            print(recommend_svd(movie_title))
    elif choice == "3":
        try:
            year = int(input("Enter the release year of movies you want recommendations for: ").strip())
            print(f"Recommendations for movies released in {year}:")
            print(recommend_by_year(year))
        except ValueError:
            print("Invalid year. Please enter a valid number.")
    else:
        print("Invalid choice. Please enter 1, 2, or 3.")

# Call the user interaction function
user_input_recommendation()

Welcome to the Movie Recommendation System!
1. KNN Recommendations
2. SVD Recommendations
3. Recommendations by Year


Choose a recommendation method (1, 2, or 3):  3
Enter the release year of movies you want recommendations for:  2003


Recommendations for movies released in 2003:


NameError: name 'df' is not defined