<a href="https://colab.research.google.com/github/arekaratharv28/ResumeProjects/blob/main/Project2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Sample movie data (replace with your actual data)
data = {'MovieID': [1, 2, 3, 4, 5],
        'Title': ['Movie A', 'Movie B', 'Movie C', 'Movie D', 'Movie E'],
        'Genre': ['Action,Sci-Fi', 'Comedy,Romance', 'Action,Thriller', 'Drama', 'Comedy'],
        'Description': ['A thrilling sci-fi adventure', 'A hilarious romantic comedy', 'An action-packed thriller', 'A touching drama', 'A funny comedy']}
movies = pd.DataFrame(data)

In [4]:
def create_movie_recommendations(movie_title, movies_df, top_n=5):
    # Create TF-IDF matrix
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(movies_df['Description'])

    # Calculate cosine similarity
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

    # Find the index of the movie
    movie_index = movies_df[movies_df['Title'] == movie_title].index
    if len(movie_index) == 0: # If the length is zero then the movie title does not exist in the dataframe
      print(f"Error: '{movie_title}' not found in the movie list.")
      return [] # return an empty list so we do not encounter an error

    movie_index = movie_index[0] # set movie index to the first and only element if it exists.

    # Get similarity scores for the movie
    similarity_scores = list(enumerate(cosine_sim[movie_index]))

    # Sort movies based on similarity scores
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    # Check if there are enough similar movies
    if len(similarity_scores) <= top_n:
        print(f"Warning: Not enough movies to recommend. Returning all {len(similarity_scores)-1} similar movies.")
        top_n = len(similarity_scores) - 1

    # Get top N similar movies
    top_movies = []
    for i in range(1, top_n + 1): # This loop starts at 1 to avoid recommending the same movie back to the user.
        movie_idx = similarity_scores[i][0]
        top_movies.append(movies_df.iloc[movie_idx]['Title'])
    return top_movies

In [5]:
# Example usage:
movie_title = 'Movie A'
recommendations = create_movie_recommendations(movie_title, movies)
print(f"Recommended movies for '{movie_title}': {recommendations}")

movie_title = 'Movie Z' # Testing the error situation where movie_title does not exist
recommendations = create_movie_recommendations(movie_title, movies)
print(f"Recommended movies for '{movie_title}': {recommendations}")

movie_title = 'Movie A'
recommendations = create_movie_recommendations(movie_title, movies, top_n=6) # Testing the edge case where not enough movies exist
print(f"Recommended movies for '{movie_title}': {recommendations}")

Recommended movies for 'Movie A': ['Movie B', 'Movie C', 'Movie D', 'Movie E']
Error: 'Movie Z' not found in the movie list.
Recommended movies for 'Movie Z': []
Recommended movies for 'Movie A': ['Movie B', 'Movie C', 'Movie D', 'Movie E']
