# Movies Recommender System

![](http://labs.criteo.com/wp-content/uploads/2017/08/CustomersWhoBought3.jpg)

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
from surprise import Reader,Dataset,SVD, accuracy
from surprise.model_selection import cross_validate, train_test_split
from scipy.sparse import coo_matrix, csr_matrix, dok_matrix
from scipy.stats import pearsonr
import warnings; warnings.simplefilter('ignore')
from surprise.prediction_algorithms import KNNBaseline ,KNNBasic,KNNWithMeans,KNNWithZScore

In [2]:
#Loading Datasets (movies and ratings)
movies = pd.read_csv('data2/movies.csv')
ratings = pd.read_csv('data2/ratings.csv')
movies['genres'] = movies['genres'].fillna('[]').apply(lambda x: x.split('|'))
# movies

## Popularity Based

Weighted Rating (WR) = $(\frac{v}{v + m} . R) + (\frac{m}{v + m} . C)$

where,
* *v* is the number of votes for the movie
* *m* is the minimum votes required to be listed in the chart
* *R* is the average rating of the movie
* *C* is the mean vote across the whole report

In [3]:
#calculating Vote_count and vote_average for each movei
vote_counts = ratings['movieId'].value_counts().reset_index()
vote_counts.columns = ['movieId', 'vote_count']

total_ratings = ratings.groupby('movieId')['rating'].sum().reset_index()
total_ratings.columns = ['movieId', 'total_ratings']
movies['vote_count'] = movies['movieId'].map(vote_counts.set_index('movieId')['vote_count']).fillna(0).astype(int)
movies['vote_average'] =  movies['movieId'].map(total_ratings.set_index('movieId')['total_ratings']).fillna(0)
movies['vote_average'] = movies['vote_average'] / movies['vote_count'] 

In [4]:
def top_movies(data):
    vote_counts = data[data['vote_count'].notnull()]['vote_count'].astype('int')
    vote_averages = data[data['vote_average'].notnull()]['vote_average'].astype('int')

    #calculating quantile for threshold
    C = vote_averages.mean()
    m = vote_counts.quantile(0.95)

    def weighted_rating(x):
        v = x['vote_count']
        R = x['vote_average']
        return (v/(v+m)*R) + (m/(m+v)*C)

    #Calculateing weighted ratings based on vote_count and vote_avearges
    qualified = data[(data['vote_count'] >= m) & (data['vote_count'].notnull()) & (data['vote_average'].notnull())][['title',  'vote_count', 'vote_average','genres']]
    qualified['vote_count'] = qualified['vote_count'].astype('int')
    qualified['vote_average'] = qualified['vote_average']
    qualified['wr'] = qualified.apply(weighted_rating, axis=1)
    return qualified.sort_values('wr', ascending=False)

In [5]:
#Getting Top 10 movies based on Popularity
top_movies(movies).head(10)

Unnamed: 0,title,vote_count,vote_average,genres,wr
314,"Shawshank Redemption, The (1994)",122296,4.416792,"[Crime, Drama]",4.401204
840,"Godfather, The (1972)",75004,4.326603,"[Crime, Drama]",4.302582
49,"Usual Suspects, The (1995)",72893,4.267865,"[Crime, Mystery, Thriller]",4.244002
1190,"Godfather: Part II, The (1974)",47271,4.26951,"[Crime, Drama]",4.232961
522,Schindler's List (1993),84232,4.242337,"[Drama, War]",4.221964
2867,Fight Club (1999),86207,4.236019,"[Action, Crime, Drama, Thriller]",4.216184
61176,Parasite (2019),12399,4.329946,"[Comedy, Drama]",4.193946
1173,12 Angry Men (1957),22730,4.267158,[Drama],4.193015
1164,One Flew Over the Cuckoo's Nest (1975),49316,4.212801,[Drama],4.178931
5509,Spirited Away (Sen to Chihiro no kamikakushi) ...,35375,4.226035,"[Adventure, Animation, Fantasy]",4.178821


In [6]:
s = movies.apply(lambda x: pd.Series(x['genres']),axis=1).stack().reset_index(level=1, drop=True)
s.name = 'genre'
gen_md = movies.drop('genres', axis=1).join(s).rename(columns={'genre': 'genres'})

def top_genre_movies(genre, percentile=0.85):
    movies_genre = gen_md[gen_md['genres'].str.lower() == genre.lower()]
    if movies_genre.empty:
        print("Genre doesn't exist")
        return
    return top_movies(movies_genre).head(10)

In [7]:
#Getting Top 10 Action movies based on Popularity
top_genre_movies('Action')

Unnamed: 0,title,vote_count,vote_average,genres,wr
2867,Fight Club (1999),86207,4.236019,Action,4.131754
2480,"Matrix, The (1999)",107056,4.160631,Action,4.079257
12223,"Dark Knight, The (2008)",65349,4.187539,Action,4.0561
14939,Inception (2010),65056,4.176187,Action,4.045051
1166,Star Wars: Episode V - The Empire Strikes Back...,80200,4.11803,Action,4.013717
257,Star Wars: Episode IV - A New Hope (1977),97202,4.0924,Action,4.006713
7029,"Lord of the Rings: The Return of the King, The...",75512,4.110181,Action,4.000327
1168,Raiders of the Lost Ark (Indiana Jones and the...,75248,4.101039,Action,3.991424
1167,"Princess Bride, The (1987)",50775,4.110901,Action,3.952496
536,Blade Runner (1982),47695,4.113712,Action,3.945816


## Content Based Recommender

In [8]:
#loading tags data
tags =  pd. read_csv('data2/genome-scores.csv')
tag_matrix = coo_matrix((tags['relevance'], (tags['movieId'] - 1, tags['tagId'] - 1)))
# tag_matrix
cosine_sim = cosine_similarity(tag_matrix, dense_output=False)

In [9]:
titles = movies['title']
genres_list = movies['genres']
titles_lower = titles.str.lower()
idx = pd.Series(movies.index, index=titles_lower)  # given title return index
ids = pd.Series(movies['movieId'].values, index=titles_lower)  # given title return id
index_id = pd.Series(movies.index, index=movies['movieId']) # given id return index
id_index = pd.Series(movies['movieId'], index=movies.index) 
genre_id =  pd.Series(movies['genres'].values, index=movies['movieId']) #given_id return gender
id_title = pd.Series(movies['title'].values, index= movies['movieId'])# given id return title 
movie_lst= list(movies['title'])
movie_lst_lower = [title.lower() for title in movie_lst]

In [10]:
def get_recommendations_using_cosine(title):
    title = title.lower()       #For allowing case_sensitive
    if title not in movie_lst_lower:
        print("Movie doesn't exist in data")
        return
    id = ids[title] 
    sim_scores_csr = cosine_sim[id-1]
    sim_scores_dense = sim_scores_csr.toarray()
    sim_scores = list(enumerate(sim_scores_dense[0])) 
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    sim_movies = [(titles.iloc[index_id[i[0]+1]],genre_id[i[0]+1],i[1]) for i in sim_scores]

    columns = ['Title', 'Genres','Similarity_Score']
    df = pd.DataFrame(sim_movies, columns=columns)
    return df

In [11]:
#get movies simialr to 'Harry Potter and the Deathly Hallows: Part 2 (2011)' (not based on user)
get_recommendations_using_cosine('Harry Potter and the Deathly Hallows: Part 2 (2011)')

Unnamed: 0,Title,Genres,Similarity_Score
0,Harry Potter and the Deathly Hallows: Part 1 (...,"[Action, Adventure, Fantasy, IMAX]",0.974739
1,Harry Potter and the Half-Blood Prince (2009),"[Adventure, Fantasy, Mystery, Romance, IMAX]",0.963505
2,Harry Potter and the Order of the Phoenix (2007),"[Adventure, Drama, Fantasy, IMAX]",0.96309
3,Harry Potter and the Goblet of Fire (2005),"[Adventure, Fantasy, Thriller, IMAX]",0.960982
4,Harry Potter and the Prisoner of Azkaban (2004),"[Adventure, Fantasy, IMAX]",0.955338
5,Harry Potter and the Chamber of Secrets (2002),"[Adventure, Fantasy]",0.94017
6,Harry Potter and the Sorcerer's Stone (a.k.a. ...,"[Adventure, Children, Fantasy]",0.938567
7,"Hobbit: The Desolation of Smaug, The (2013)","[Adventure, Fantasy, IMAX]",0.88929
8,"Hobbit: An Unexpected Journey, The (2012)","[Adventure, Fantasy, IMAX]",0.885542
9,The Hobbit: The Battle of the Five Armies (2014),"[Adventure, Fantasy]",0.881329


In [15]:
# to get recommendations for user based on his top 3 favourite movies
def get_movies_content(userid,n):
    user_ratings = ratings[ratings['userId'] == userid]
    sorted_user_ratings = user_ratings.sort_values(by='rating', ascending=False)
    top_3_movies = sorted_user_ratings.head(10)
    top_3_titles = id_title[top_3_movies['movieId']]
    total_scores = [0] * 288167
    total_r = 0
    for (id,r) in zip(top_3_movies['movieId'],top_3_movies['rating']):
        sim_scores_csr = cosine_sim[id-1]
        sim_scores_dense = sim_scores_csr.toarray()
        total_scores += (sim_scores_dense[0] * r)
        total_r+=r
    total_scores = (total_scores /10)
    nan_indices = np.isnan(total_scores)
    total_scores[nan_indices] = 0
    total_scores = list(enumerate(total_scores))
    total_scores = sorted(total_scores, key=lambda x: x[1], reverse=True)
    total_scores = total_scores[1:n+1]
    actual_rating = [0]*(n+1)
    for i in range(len(total_scores)):
        movie_id_to_find = total_scores[i][0] + 1
        ratings_for_movie = user_ratings.loc[user_ratings['movieId'] == movie_id_to_find, 'rating']
        actual_rating[i] = ratings_for_movie.values[0] if not ratings_for_movie.empty else 'NA'
    sim_movies = []
    for i in range(len(total_scores)):
        if (total_scores[i][0]+1) in (movies['movieId'].tolist()):
            title=titles.iloc[index_id[total_scores[i][0]+1]]
            genres=genre_id[total_scores[i][0]+1]
            pred_rating=total_scores[i][1]
            sim_movies.append((title,genres,pred_rating,actual_rating[i]))  
    columns = ['Title','Genres', 'Prediction_ratings', 'Actual_Rating_GivenbyUSer']
    df = pd.DataFrame(sim_movies, columns=columns)
    return df
    

In [16]:
#get_movies_content(31,10) #user 11 - Top 6 recomendation based on his history 31
get_movies_content(11,10)

Unnamed: 0,Title,Genres,Prediction_ratings,Actual_Rating_GivenbyUSer
0,Star Wars: Episode V - The Empire Strikes Back...,"[Action, Adventure, Sci-Fi]",4.192383,5.0
1,"Lord of the Rings: The Two Towers, The (2002)","[Adventure, Fantasy]",4.190967,5.0
2,Star Wars: Episode IV - A New Hope (1977),"[Action, Adventure, Sci-Fi]",4.187074,5.0
3,"Lord of the Rings: The Return of the King, The...","[Action, Adventure, Drama, Fantasy]",4.174174,5.0
4,Star Wars: Episode VI - Return of the Jedi (1983),"[Action, Adventure, Sci-Fi]",4.107578,5.0
5,RRR (2022),"[Action, Drama, War]",4.097801,
6,Dune (2021),"[Action, Adventure, Drama, Sci-Fi]",4.084624,
7,Raiders of the Lost Ark (Indiana Jones and the...,"[Action, Adventure]",4.074629,4.5
8,"Matrix, The (1999)","[Action, Sci-Fi, Thriller]",4.064245,5.0
9,Firefly (2002),"[Action, Adventure, Sci-Fi, Western]",4.057606,


## Collab Filtering

In [17]:
from surprise import Dataset, Reader,SVD
from surprise.model_selection import cross_validate

reader = Reader(rating_scale=(0,5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)


In [18]:
svd = SVD()
#results = cross_validate(svd, data, measures=['RMSE', 'MAE'],cv=5,verbose = True)
trainset = data.build_full_trainset()
svd.fit(trainset)
svd.predict(1,1036)[3]

4.2138492326252965

In [None]:
def recommendation(algo, trainset,testset):
      # Train the algorithm on the trainset, and predict ratings for the testset
  algo.fit(trainset)
  predictions = algo.test(testset)

  # Then compute RMSE
  accuracy.rmse(predictions)
  # Then compute MAE
  accuracy.mae(predictions)
  return

trainset, testset = train_test_split(data, test_size=0.8,random_state=42)
sim_options = {
                'name': 'cosine',
               'user_based': False  # compute  similarities between items
               }
model = KNNBaseline(sim_options=sim_options)
recommendation(model, trainset,testset)

model = KNNWithMeans(sim_options=sim_options)
recommendation(model, trainset,testset)

model = KNNWithZScore(sim_options=sim_options)
recommendation(model, trainset,testset)

In [20]:
def get_recommendations_collab(userId,n):
    predictions=[[-1,0]]*288167
    user_ratings = ratings[ratings['userId'] == userId]
    for i in movies['movieId']:
        pred = svd.predict(userId,i)[3]
        predictions[index_id[i]]=[i,pred] 
    sort_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
    sort_predictions = sort_predictions[0:n]
    actual_rating = [0]*n
    for i in range(n):
        movie_id_to_find = sort_predictions[i][0]
        ratings_for_movie = user_ratings.loc[user_ratings['movieId'] == movie_id_to_find, 'rating']
        actual_rating[i] = ratings_for_movie.values[0] if not ratings_for_movie.empty else 'NA'
    movie_titles_pred=[]
    for i in range(len(sort_predictions)):
        if (sort_predictions[i][0]) in (movies['movieId'].tolist()):
            title=titles.iloc[index_id[sort_predictions[i][0]]]
            genres=genre_id[sort_predictions[i][0]]
            pred_rating=sort_predictions[i][1]
            movie_titles_pred.append((title,genres,pred_rating,actual_rating[i]))  
    
    columns = ['Title', 'Genres','Prediction_Ratings','Actual_Ratings']
    df = pd.DataFrame(movie_titles_pred, columns=columns)
    return df

In [21]:
def improved_recommendations(userId,genre,n):
    predictions=[[-1,0]]*288167
    user_ratings = ratings[ratings['userId'] == userId]
    selected_movies = gen_md[gen_md['genres'].str.lower() == genre.lower()]
    selected_movie_ids = selected_movies['movieId'].tolist()
    genre_id1 =  pd.Series(selected_movies['genres'].values, index=selected_movies['movieId']) #given_id return gender
    for i in selected_movie_ids:
        pred = svd.predict(userId,i)[3]
        predictions[index_id[i]]=[i,pred] 
    sort_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
    sort_predictions = sort_predictions[0:n]
    actual_rating = [0]*n
    for i in range(n):
        movie_id_to_find = sort_predictions[i][0]
        ratings_for_movie = user_ratings.loc[user_ratings['movieId'] == movie_id_to_find, 'rating']
        actual_rating[i] = ratings_for_movie.values[0] if not ratings_for_movie.empty else 'NA'
        
    movie_titles_pred = [(titles.iloc[index_id[sort_predictions[i][0]]],genre_id1[sort_predictions[i][0]],sort_predictions[i][1],actual_rating[i]) for i in range(len(sort_predictions))]
 
    columns = ['Title','Genres', 'Prediction_Ratings','Actual_Ratings']
    df = pd.DataFrame(movie_titles_pred, columns=columns)
    return df

In [22]:
get_recommendations_collab(31,10) #user 31 best

Unnamed: 0,Title,Genres,Prediction_Ratings,Actual_Ratings
0,Star Wars: Episode V - The Empire Strikes Back...,"[Action, Adventure, Sci-Fi]",4.234695,4.0
1,Star Wars: Episode IV - A New Hope (1977),"[Action, Adventure, Sci-Fi]",4.231612,4.0
2,Star Wars: Episode VI - Return of the Jedi (1983),"[Action, Adventure, Sci-Fi]",4.046996,4.0
3,"Lord of the Rings: The Return of the King, The...","[Action, Adventure, Drama, Fantasy]",3.995462,4.0
4,"Lord of the Rings: The Fellowship of the Ring,...","[Adventure, Fantasy]",3.964184,4.0
5,Star Trek II: The Wrath of Khan (1982),"[Action, Adventure, Sci-Fi, Thriller]",3.957736,
6,"Lord of the Rings: The Two Towers, The (2002)","[Adventure, Fantasy]",3.942691,4.0
7,Star Trek IV: The Voyage Home (1986),"[Adventure, Comedy, Sci-Fi]",3.929547,
8,Raiders of the Lost Ark (Indiana Jones and the...,"[Action, Adventure]",3.877285,5.0
9,Terminator 2: Judgment Day (1991),"[Action, Sci-Fi]",3.80081,3.0


In [23]:
improved_recommendations(31,"Action",10)

Unnamed: 0,Title,Genres,Prediction_Ratings,Actual_Ratings
0,Star Wars: Episode V - The Empire Strikes Back...,Action,4.234695,4.0
1,Star Wars: Episode IV - A New Hope (1977),Action,4.231612,4.0
2,Star Wars: Episode VI - Return of the Jedi (1983),Action,4.046996,4.0
3,"Lord of the Rings: The Return of the King, The...",Action,3.995462,4.0
4,Star Trek II: The Wrath of Khan (1982),Action,3.957736,
5,Raiders of the Lost Ark (Indiana Jones and the...,Action,3.877285,5.0
6,Terminator 2: Judgment Day (1991),Action,3.80081,3.0
7,"Terminator, The (1984)",Action,3.78689,4.0
8,Indiana Jones and the Last Crusade (1989),Action,3.762966,4.0
9,"Matrix, The (1999)",Action,3.740732,3.0


## Hybrid Recommender

In [24]:
def convert_int(x):
    try:
        return int(x)
    except:
        return np.nan

In [25]:
def hybrid(userId, title,n):
    title = title.lower()
    user_ratings = ratings[ratings['userId'] == userId]
    if title not in movie_lst_lower:
        print("Movie doesn't exist in data")
        return
    id = ids[title] #5816
    sim_scores_csr = cosine_sim[id-1]
    sim_scores_dense = sim_scores_csr.toarray()
    sim_scores = list(enumerate(sim_scores_dense[0]))  # Use sim_scores_dense[0] to extract the array
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:26]
    movie_ids = [i[0]+1 for i in sim_scores]
    predictions=[]
    for i in movie_ids:
        pred = svd.predict(userId,i)[3]
        predictions.append([i,pred])  
    sort_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
    sort_predictions = sort_predictions[0:n]
    actual_rating = [0]*n
    for i in range(n):
        movie_id_to_find = sort_predictions[i][0]
        ratings_for_movie = user_ratings.loc[user_ratings['movieId'] == movie_id_to_find, 'rating']
        actual_rating[i] = ratings_for_movie.values[0] if not ratings_for_movie.empty else 'NA'
        
    movie_titles_pred = [(titles.iloc[index_id[sort_predictions[i][0]]],genre_id[sort_predictions[i][0]],sort_predictions[i][1],actual_rating[i]) for i in range(len(sort_predictions))]
 
    columns = ['Title','Genres', 'Prediction_Ratings','Actual_Ratings']
    df = pd.DataFrame(movie_titles_pred, columns=columns)
    return df
    movie_titles_pred = [(titles.iloc[index_id[i[0]]],i[1]) for i in sort_predictions]
    return movie_titles_pred

In [26]:
hybrid(17, 'Harry Potter and the Chamber of Secrets (2002)',10) #17,22

Unnamed: 0,Title,Genres,Prediction_Ratings,Actual_Ratings
0,Harry Potter and the Prisoner of Azkaban (2004),"[Adventure, Fantasy, IMAX]",5.0,5.0
1,Harry Potter and the Deathly Hallows: Part 2 (...,"[Action, Adventure, Drama, Fantasy, Mystery, I...",5.0,5.0
2,Harry Potter and the Half-Blood Prince (2009),"[Adventure, Fantasy, Mystery, Romance, IMAX]",4.977176,5.0
3,Harry Potter and the Order of the Phoenix (2007),"[Adventure, Drama, Fantasy, IMAX]",4.952743,5.0
4,Stardust (2007),"[Adventure, Comedy, Fantasy, Romance]",4.945923,
5,Harry Potter and the Goblet of Fire (2005),"[Adventure, Fantasy, Thriller, IMAX]",4.943713,5.0
6,Harry Potter and the Deathly Hallows: Part 1 (...,"[Action, Adventure, Fantasy, IMAX]",4.93379,5.0
7,Harry Potter and the Sorcerer's Stone (a.k.a. ...,"[Adventure, Children, Fantasy]",4.925546,
8,"Hobbit: An Unexpected Journey, The (2012)","[Adventure, Fantasy, IMAX]",4.840456,
9,"Hobbit: The Desolation of Smaug, The (2013)","[Adventure, Fantasy, IMAX]",4.805157,


In [32]:
hybrid(500, 'Spider-Man (2002)',10)

Unnamed: 0,Title,Genres,Prediction_Ratings,Actual_Ratings
0,Batman Begins (2005),"[Action, Crime, IMAX]",4.492686,
1,X-Men: First Class (2011),"[Action, Adventure, Sci-Fi, Thriller, War]",4.228971,
2,Spider-Man 2 (2004),"[Action, Adventure, Sci-Fi, IMAX]",4.207067,
3,Superman (1978),"[Action, Adventure, Sci-Fi]",4.179212,
4,Iron Man (2008),"[Action, Adventure, Sci-Fi]",4.161212,
5,Spider-Man: No Way Home (2021),"[Action, Adventure, Fantasy, Sci-Fi]",4.096235,
6,X2: X-Men United (2003),"[Action, Adventure, Sci-Fi, Thriller]",4.089542,
7,X-Men (2000),"[Action, Adventure, Sci-Fi]",4.054373,
8,Zack Snyder's Justice League (2021),"[Action, Adventure, Fantasy, Sci-Fi]",4.002848,
9,"Avengers, The (2012)","[Action, Adventure, Sci-Fi, IMAX]",3.924625,


In [33]:
# print(rec_content)
def hybrid_parallel(userId,n):
    user_ratings = ratings[ratings['userId'] == userId]
    rec_content = get_movies_content(userId,86537)
    rec_collab = get_recommendations_collab(userId,86537)
    final_pred = [0]*86537
    for i in range(86537): 
        title_to_check = titles.iloc[i]
        a = rec_content.loc[rec_content['Title'] == title_to_check, 'Prediction_ratings'].values[0] if title_to_check in rec_content['Title'].values else 0
        b = rec_collab.loc[rec_collab['Title'] == title_to_check, 'Prediction_Ratings'].values[0] if title_to_check in rec_content['Title'].values else 0
        final_pred[i] = (a+b)/2
    predictions = list(enumerate(final_pred))  # Use sim_scores_dense[0] to extract the array
    # print(predictions)
    sort_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
    sort_predictions = sort_predictions[0:n]
    movie_ids = [id_index[i[0]] for i in sort_predictions]
    actual_rating = [0]*n
    for i in range(n):
        movie_id_to_find = id_index[sort_predictions[i][0]]
        ratings_for_movie = user_ratings.loc[user_ratings['movieId'] == movie_id_to_find, 'rating']
        actual_rating[i] = ratings_for_movie.values[0] if not ratings_for_movie.empty else 'NA'
        
    movie_titles_pred = [(titles.iloc[sort_predictions[i][0]],genres_list[sort_predictions[i][0]],sort_predictions[i][1],actual_rating[i]) for i in range(len(sort_predictions))]

    columns = ['Title','Genres', 'Prediction_Ratings','Actual_Ratings']
    df = pd.DataFrame(movie_titles_pred, columns=columns)
    return df
    

In [34]:
hybrid_parallel(17,10) #17,22   (###Change collab to 86437)

Unnamed: 0,Title,Genres,Prediction_Ratings,Actual_Ratings
0,Avengers: Infinity War - Part I (2018),"[Action, Adventure, Sci-Fi]",4.466662,
1,Iron Man (2008),"[Action, Adventure, Sci-Fi]",4.464771,5.0
2,Avengers: Infinity War - Part II (2019),"[Action, Adventure, Sci-Fi]",4.46078,
3,X-Men: First Class (2011),"[Action, Adventure, Sci-Fi, Thriller, War]",4.458854,5.0
4,"Avengers, The (2012)","[Action, Adventure, Sci-Fi, IMAX]",4.454766,5.0
5,X2: X-Men United (2003),"[Action, Adventure, Sci-Fi, Thriller]",4.446281,5.0
6,X-Men (2000),"[Action, Adventure, Sci-Fi]",4.443931,5.0
7,Thor: Ragnarok (2017),"[Action, Adventure, Sci-Fi]",4.432909,
8,Spider-Man (2002),"[Action, Adventure, Sci-Fi, Thriller]",4.430223,5.0
9,Spider-Man 2 (2004),"[Action, Adventure, Sci-Fi, IMAX]",4.406532,5.0
