In [20]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import itertools
from IPython.display import display

tqdm.pandas()

In [21]:
# define column names
col_names = {
    "data": [ 'user id' , 'item id' , 'rating' , 'timestamp'],
    "item": ['movie id' , 'movie title' , 'release date' , 'video release date' ,
              'IMDb URL' , 'unknown' , 'Action' , 'Adventure' , 'Animation' ,
              "Children's" , 'Comedy' , 'Crime' , 'Documentary' , 'Drama' , 'Fantasy' ,
              'Film-Noir' , 'Horror' , 'Musical' , 'Mystery' , 'Romance' , 'Sci-Fi' ,
              'Thriller' , 'War' , 'Western'],
    "user": ['user id' , 'age' , 'gender' , 'occupation' , 'zip code'],
    "genre": ['genre', 'genre id']
}

In [22]:
def read_data(file_name, sep, encoding, col_names):
    output = pd.read_csv(file_name, sep=sep, encoding=encoding, names=col_names)
    return output

In [23]:
ratings = read_data("./u.data", "\t", 'utf-8', col_names["data"])
ratings.head(5)

Unnamed: 0,user id,item id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [339]:
movies = read_data("./u.item", "|", 'latin-1', col_names["item"])
movies.drop(columns= ['video release date', 'IMDb URL'], inplace=True)
# only for debug
movies = movies[0:100]
movies.head(5)

Unnamed: 0,movie id,movie title,release date,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,0,0,0,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0


In [277]:
def get_movie_genres(movie_id):
    movie = movies.loc[(movies['movie id']==movie_id)]
    genres = movie.values[0][3:]
    genres_index = np.argwhere(genres==1)
    return genres_index

In [278]:
def get_movies_groupby_genre():
    genres = {k: [] for k in range(19)}
    for index, row in tqdm(movies.iterrows()):
        row = np.array(row[3:])
        genres_index = np.argwhere(row==1)
        for i in genres_index:
            genres[i[0]].append(index+1)
    return genres 

In [279]:
display(get_movies_groupby_genre())

10it [00:00, 4325.36it/s]


{0: [],
 1: [2, 4],
 2: [2],
 3: [1],
 4: [1, 8],
 5: [1, 4, 8],
 6: [5],
 7: [],
 8: [4, 5, 6, 7, 8, 9, 10],
 9: [],
 10: [],
 11: [],
 12: [],
 13: [],
 14: [],
 15: [7],
 16: [2, 3, 5],
 17: [10],
 18: []}

In [280]:
users = read_data("./u.user", "|", 'utf-8', col_names["user"])
users.head(5)

Unnamed: 0,user id,age,gender,occupation,zip code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [281]:
genres = read_data("./u.genre", "|", 'utf-8', col_names["genre"])
genres.head(20)

Unnamed: 0,genre,genre id
0,unknown,0
1,Action,1
2,Adventure,2
3,Animation,3
4,Children's,4
5,Comedy,5
6,Crime,6
7,Documentary,7
8,Drama,8
9,Fantasy,9


In [185]:
def get_ratings_single_movie(movie_id):
    # get all available ratings for a single movie
    return ratings[ratings["item id"] == movie_id].sort_values(by=['user id'])

In [186]:
def get_ratings_single_user(user_id):
    # get all movies rated by the user
    return ratings[ratings["user id"] == user_id].sort_values(by=['item id'])  

In [187]:
def get_both_rated_set(user1_ratings, user2_ratings):
    # return all items rated by both user sorted by the item id
    sim_user1_ratings = user1_ratings[user1_ratings["item id"].isin(user2_ratings["item id"])]
    sim_user2_ratings = user2_ratings[user2_ratings["item id"].isin(user1_ratings["item id"])]
    
    sim_user1_ratings = sim_user1_ratings.sort_values(by=['item id'])
    sim_user2_ratings = sim_user2_ratings.sort_values(by=['item id'])
    return sim_user1_ratings, sim_user2_ratings

In [188]:
def get_both_raters_set(item1_ratings, item2_ratings):
    # return all ratings bytserh users who rated both items sorted by the user id
    item1_raters = item1_ratings[item1_ratings["user id"].isin(item2_ratings["user id"])]["user id"].tolist()
    item2_raters = item2_ratings[item2_ratings["user id"].isin(item1_ratings["user id"])]["user id"].tolist()
    users_rated_both = np.unique(item1_raters + item2_raters)
    ratings_both = ratings[ratings["user id"].isin(users_rated_both)]
    ratings_both = ratings_both.sort_values(by=['user id'])
    return ratings_both

# Pearson correlation function

In [189]:
def pearson_correlation(user1_id, user2_id):
    # calculate pearson correlation between 2 users
    # step1: get all ratings by 2 users
    user1_ratings = get_ratings_single_user(user1_id)
    user2_ratings = get_ratings_single_user(user2_id)
    
    # step2: get the items rated by both users
    sim_user1_ratings, sim_user2_ratings = get_both_rated_set(user1_ratings, user2_ratings)
    
    # step3: calculate mean ratings ra, rb
    mean_user1_ratings = np.mean(user1_ratings['rating'])
    mean_user2_ratings = np.mean(user2_ratings['rating'])

    # step4: calculate the variance  
    var_1 = np.array(np.subtract(sim_user1_ratings["rating"], [mean_user1_ratings]))
    var_2 = np.array(np.subtract(sim_user2_ratings["rating"], [mean_user2_ratings]))

    # step5: compute the pearson correlation
    numerator = np.sum(var_1*var_2)
    denominator = np.sqrt(np.sum(np.power(var_1, 2)))*np.sqrt(np.sum(np.power(var_2, 2)))

    if denominator == 0:
        # in the case that denominator = 0 return NaN
        return float('NaN'), user1_id, user2_id
    else:
        correlation = numerator / denominator
        return correlation, user1_id, user2_id


# User-based prediction function

In [190]:
def predict_single_pair_user(user1_id, user2_id, item_id):
    # predict item's score of user 1 based on user 2
    # step1: get all ratings by 2 users
    user1_ratings = get_ratings_single_user(user1_id)
    user2_ratings = get_ratings_single_user(user2_id)
    
    # step2: get the items rated by both users
    sim_user1_ratings, sim_user2_ratings = get_both_rated_set(user1_ratings, user2_ratings)
    
    # if there is no similar rated item, return nan
    if sim_user1_ratings.empty:
        return [float('NaN'), float('NaN')]
    
    # step3: compute the mean rating of user 2
    mean_user2_ratings = np.mean(user2_ratings['rating'])
    
    # step4: get the pearson correlation
    correlation, user1_id, user2_id = pearson_correlation(user1_id, user2_id)
    var_2 = float(user2_ratings[user2_ratings["item id"] == item_id]["rating"]) - mean_user2_ratings

    # step5: return the output
    numerator = (correlation*var_2)
    denominator = correlation
    return [numerator, denominator]

In [191]:
def predict_user_item(user_id, item_id):
    # predict item's score for user
    # if user already rated the item, return the rating
    existing_rating = ratings.loc[(ratings['user id'] == user_id) & (ratings['item id'] == item_id)]
    if not existing_rating.empty:
        return item_id, movies.at[item_id - 1, 'movie title'], existing_rating['rating'].values[0]
        
    # step 1: get user ratings
    user_ratings = get_ratings_single_user(user_id)
    # step 2: compute the mean rating
    mean_user_ratings = np.mean(user_ratings['rating'])
    
    # step 3:  get all other users which rated the item
    users_domain = ratings[ratings["item id"] == item_id]
    
    # step 4: predict for each user in the users domain
    correlations = users_domain.apply(lambda row: predict_single_pair_user(user_id, row["user id"], item_id), axis=1, result_type="expand")
    correlations = np.array(correlations)
    
    # filter all nan, which cause by no same rated item between 2 users
    correlations = correlations[~np.isnan(correlations).any(axis=1), :]

    # step 5: calculate the score and return
    pred_score = mean_user_ratings + np.sum(correlations[:,0]) / np.sum(correlations[:,1])
    return item_id, movies.at[item_id - 1, 'movie title'], pred_score

In [192]:
def get_predicted_ratings(user_id):
    movies_ratings = movies.progress_apply(lambda row: predict_user_item(user_id, row["movie id"]), axis=1, result_type="expand")
    movies_ratings.columns = ["movie id", "movie title", "pred_rating"]

    return movies_ratings

# Average aggregation method

In [193]:
def avg_aggration(user1, user2, user3):
    # get predicted ratings of 3 users
    user1_ratings = get_predicted_ratings(user1)
    user2_ratings = get_predicted_ratings(user2)
    user3_ratings = get_predicted_ratings(user3)

    # create dataframe with all 3 users' predicted ratings
    data = [user1_ratings["movie title"], user1_ratings["pred_rating"], user2_ratings["pred_rating"], user3_ratings["pred_rating"]]
    headers = ["movie title", "user{} rating".format(str(user1)), "user{} rating".format(str(user2)), "user{} rating".format(str(user3))]
    all_users_ratings = pd.concat(data, axis=1, keys=headers)
    all_users_ratings['average'] = all_users_ratings.iloc[:, 1:4].mean(axis=1)

    return all_users_ratings

In [194]:
def top_avg_movies(user1, user2, user3, movies_num):
    # get top movies with average rating
    all_users_ratings = avg_aggration(user1, user2, user3)
    top_movies = all_users_ratings.sort_values(by=['average'], ascending=False)
    top_movies = top_movies.iloc[:movies_num, :]
    
    return top_movies

In [195]:
# This took about 7.5 min to run
# top_movies_avg = top_avg_movies(2, 17, 35, 20)
# top_movies_avg.head(20)

# Least-misery aggregation method

In [196]:
def least_misery_aggration(user1, user2, user3):
    # get predicted ratings of 3 users
    user1_ratings = get_predicted_ratings(user1)
    user2_ratings = get_predicted_ratings(user2)
    user3_ratings = get_predicted_ratings(user3)

    # create dataframe with all 3 users' predicted ratings
    data = [user1_ratings["movie title"], user1_ratings["pred_rating"], user2_ratings["pred_rating"], user3_ratings["pred_rating"]]
    headers = ["movie title", "user{} rating".format(str(user1)), "user{} rating".format(str(user2)), "user{} rating".format(str(user3))]
    all_users_ratings = pd.concat(data, axis=1, keys=headers)
    all_users_ratings['minimum'] = all_users_ratings.iloc[:, 1:4].min(axis=1)

    return all_users_ratings

In [197]:
def top_least_misery_movies(user1, user2, user3, movies_num):
    # get top movies with least-misery rating
    all_users_ratings = least_misery_aggration(user1, user2, user3)
    top_movies = all_users_ratings.sort_values(by=['minimum'], ascending=False)
    top_movies = top_movies.iloc[:movies_num, :]
    
    return top_movies

# Both aggregation methods

In [199]:
def both_aggregation(users, users_ratings):
    # create dataframe with all 3 users' predicted ratings
    data = [users_ratings[0]["movie id"], users_ratings[0]["movie title"], users_ratings[0]["pred_rating"], users_ratings[1]["pred_rating"], users_ratings[2]["pred_rating"]]
    headers = ["movie id", "movie title", "user{} rating".format(str(users[0])), "user{} rating".format(str(users[1])), "user{} rating".format(str(users[2]))]
    all_users_ratings = pd.concat(data, axis=1, keys=headers)

    # remove ratings under threshold
    all_users_ratings = all_users_ratings[all_users_ratings[f"user{str(users[0])} rating"] >= 2]
    all_users_ratings = all_users_ratings[all_users_ratings[f"user{str(users[1])} rating"] >= 2]
    all_users_ratings = all_users_ratings[all_users_ratings[f"user{str(users[2])} rating"] >= 2]

    all_users_ratings['average'] = all_users_ratings.iloc[:, 1:4].mean(axis=1)
    all_users_ratings['minimum'] = all_users_ratings.iloc[:, 1:4].min(axis=1)
    # 1st iteration of sequential recommendation: score = average rating
    all_users_ratings['score'] = all_users_ratings.iloc[:, 1:4].mean(axis=1)

    return all_users_ratings

In [200]:
def get_genre_ratings(users, users_ratings):
    movies = users_ratings[0]["movie title"]

# Disagreements function

In [201]:
def compute_disagreements(user1_ratings, user2_ratings):
    user1_sorted_rating = np.argsort(user1_ratings["pred_rating"])
    user2_sorted_rating = np.argsort(user2_ratings["pred_rating"])
    pairs = itertools.combinations(range(0, len(user1_sorted_rating)), 2)
    distance = 0
    for x, y in pairs:
        a = user1_sorted_rating[x] - user1_sorted_rating[y]
        b = user2_sorted_rating[x] - user2_sorted_rating[y]
        if a * b < 0:
            distance += 1
    return distance

In [202]:
def disagreements_aggration(user1, user2, user3):
    # get predicted ratings of 3 users
    user1_ratings = get_predicted_ratings(user1)
    user2_ratings = get_predicted_ratings(user2)
    user3_ratings = get_predicted_ratings(user3)
    
    # get pairwise disagreements score
    dis_1_2 = compute_disagreements(user1_ratings, user2_ratings)
    dis_1_3 = compute_disagreements(user1_ratings, user3_ratings)
    dis_2_3 = compute_disagreements(user2_ratings, user3_ratings)
    
    disagreements = [dis_1_2, dis_1_3, dis_2_3]
    
    # normalize the distance score range 1 - 5
    dis_1_2_norm = ((dis_1_2 - min(disagreements))*4)/(max(disagreements) - min(disagreements))+1
    dis_1_3_norm = ((dis_1_3 - min(disagreements))*4)/(max(disagreements) - min(disagreements))+1
    dis_2_3_norm = ((dis_2_3 - min(disagreements))*4)/(max(disagreements) - min(disagreements))+1
    
    # add disagreements as the penalty 
    user1_ratings["pred_rating"] = user1_ratings["pred_rating"] * (1/(dis_1_2_norm + dis_1_3_norm))
    user2_ratings["pred_rating"] = user2_ratings["pred_rating"] * (1/(dis_1_2_norm + dis_2_3_norm))
    user3_ratings["pred_rating"] = user3_ratings["pred_rating"] * (1/(dis_1_3_norm + dis_2_3_norm))
    
    # create dataframe with all 3 users' predicted ratings
    data = [user1_ratings["movie title"], user1_ratings["pred_rating"], user2_ratings["pred_rating"], user3_ratings["pred_rating"]]
    headers = ["movie title", "user{} rating".format(str(user1)), "user{} rating".format(str(user2)), "user{} rating".format(str(user3))]
    all_users_ratings = pd.concat(data, axis=1, keys=headers)
    all_users_ratings['mean'] = all_users_ratings.iloc[:, 1:4].mean(axis=1)
    
    return all_users_ratings

In [203]:
def top_disagreements_movies(user1, user2, user3, movies_num):
    # get top movies with least-misery rating
    all_users_ratings = disagreements_aggration(user1, user2, user3)
    top_movies = all_users_ratings.sort_values(by=['mean'], ascending=False)
    top_movies = top_movies.iloc[:movies_num, :]
    
    return top_movies

# Sequential recommendation

In [340]:
def get_user_ratings_by_genre(user_ratings, movies_num):
    movies_grouby_genre = get_movies_groupby_genre()
    user_ratings = user_ratings.sort_values(by=['pred_rating'], ascending=False).head(movies_num)
    score_grouby_genre = {k: 0 for k in range(19)}
    for genre_key in movies_grouby_genre.keys():
        for movie in movies_grouby_genre[genre_key]:
            score = user_ratings.loc[user_ratings["movie id"]==movie]['pred_rating']
            if not score.empty:
                score_grouby_genre[genre_key] += int(score)
    return score_grouby_genre        

In [341]:
def get_group_ratings_by_genre(group_ratings, movies_num):
    movies_grouby_genre = get_movies_groupby_genre()
    group_ratings = group_ratings.sort_values(by=['score'], ascending=False).head(movies_num)
    score_grouby_genre = {k: 0 for k in range(19)}
    for genre_key in movies_grouby_genre.keys():
        for movie in movies_grouby_genre[genre_key]:
            score = group_ratings.loc[group_ratings["movie id"]==movie]['score']
            if not score.empty:
                score_grouby_genre[genre_key] += int(score)
    return score_grouby_genre 

In [342]:
def user_satisfaction(user, user_ratings, group_ratings, movies_num):
    top_user_ratings = user_ratings.sort_values(by=['pred_rating'], ascending=False).head(movies_num)

    user_filtered_group_ratings = group_ratings.sort_values(by=['score'], ascending=False).head(movies_num)
    group_list_satisfaction = user_filtered_group_ratings["user{} rating".format(str(user))].sum()

    user_list_satisfaction = top_user_ratings["pred_rating"].sum()
    
    return group_list_satisfaction/user_list_satisfaction

In [343]:
def group_satisfaction(users_list, users_ratings, group_ratings, movies_num):
    user_satisfactions = []
    for (index, user) in enumerate(users_list):
        user_satisfactions.append(user_satisfaction(user, users_ratings[index], group_ratings, movies_num))
    
    # return average satisfaction and weight (alpha)
    return [sum(user_satisfactions)/len(user_satisfactions), max(user_satisfactions) - min(user_satisfactions)]

In [344]:
def group_genre_satisfaction(group_ratings_by_genre, users_ratings_by_genre):
    users_satisfactions_by_genre = []
    for user_ratings_by_genre in users_ratings_by_genre:
        satisfactions = {k: 0 for k in range(19)}
        for key in satisfactions.keys():
            if user_ratings_by_genre[key]:
                satisfactions[key] = group_ratings_by_genre[key]/user_ratings_by_genre[key] 
            else:
                satisfactions[key] = 0
        users_satisfactions_by_genre.append(satisfactions)
        
    output = {k: 0 for k in range(19)}
    
    for i in range(19):
        users_satisfactions_genre_i = tuple(d[i] for d in users_satisfactions_by_genre)
        output[i] = max(users_satisfactions_genre_i) - min(users_satisfactions_genre_i)
    
    return output

In [345]:
def get_genre_alpha(genre_satisfaction, group_ratings):
    movies_id = list(group_ratings["movie id"])
    genre_alpha = {k: 0 for k in movies_id}
    for movie_id in movies_id:
        genres = get_movie_genres(movie_id)
        for genre in genres:
            genre_alpha[movie_id] += genre_satisfaction[genre[0]]
            
    return pd.DataFrame.from_dict(genre_alpha, orient="index")

In [346]:
def sequential_iteration(users_list, users_ratings, rated_movies, group_ratings, iteration, movies_num, group_ratings_by_genre, users_ratings_by_genre):    
    filtered_group_ratings = group_ratings[group_ratings.index.isin(rated_movies.index)]
    genre_satisfaction = group_genre_satisfaction(group_ratings_by_genre, users_ratings_by_genre)
    
    genre_alpha = get_genre_alpha(genre_satisfaction, group_ratings)
    

    alpha = 0
    if iteration > 1:
        alpha = group_satisfaction(users_list, users_ratings, group_ratings, movies_num)[1]  
    filtered_group_ratings["score"] = (1-genre_alpha)*(1-alpha)*filtered_group_ratings["average"] + genre_alpha*alpha*filtered_group_ratings["minimum"]

    filtered_group_ratings = filtered_group_ratings.sort_values(by=['score'], ascending=False)
    return filtered_group_ratings

In [347]:
def sequential_recommender(users_list, sequences_num, movies_num):
    users_ratings = []
    users_ratings_by_genre = []
    for user in users_list:
        user_ratings = get_predicted_ratings(user)
        user_ratings_by_genre = get_user_ratings_by_genre(user_ratings, movies_num)
        users_ratings.append(user_ratings)
        users_ratings_by_genre.append(user_ratings_by_genre)
        
    rated_movies = pd.concat(users_ratings, axis=0)
    rated_movies = rated_movies[~rated_movies.index.duplicated(keep='first')]
    
    group_ratings = both_aggregation(users_list, users_ratings)
    group_ratings_by_genre = get_group_ratings_by_genre(group_ratings, movies_num)
    recommended_sequence = []
    for i in range(1, sequences_num+1):
        group_ratings_iteration = sequential_iteration(users_list, users_ratings, rated_movies, group_ratings, i, movies_num, group_ratings_by_genre, users_ratings_by_genre)
        recommended_sequence.append(group_ratings_iteration.head(movies_num))
        group_ratings = group_ratings_iteration

    return recommended_sequence

In [348]:
recommended_sequence = sequential_recommender([2, 17, 35], 5, 20)

100%|██████████| 100/100 [01:16<00:00,  1.30it/s]
100it [00:00, 7748.86it/s]
100%|██████████| 100/100 [01:17<00:00,  1.29it/s]
100it [00:00, 5314.36it/s]
100%|██████████| 100/100 [01:22<00:00,  1.21it/s]
100it [00:00, 8590.48it/s]
100it [00:00, 7479.01it/s]


In [349]:
display(recommended_sequence[0])
display(recommended_sequence[1])
display(recommended_sequence[2])
display(recommended_sequence[3])
display(recommended_sequence[4])

Unnamed: 0,movie id,movie title,user2 rating,user17 rating,user35 rating,average,minimum,score
84,85,"Ref, The (1994)",3.29265,2.364469,3.515338,2.828559,2.364469,3.191919
48,49,I.Q. (1994),3.485273,2.665797,29.361209,3.075535,2.665797,3.0
75,76,Carlito's Way (1993),3.767388,2.665599,3.093774,3.216493,2.665599,3.0
32,33,Desperado (1995),3.64142,2.720421,3.158976,3.180921,2.720421,3.0
18,19,Antonia's Line (1995),3.0,3.796809,3.804291,3.398405,3.0,2.885246
59,60,Three Colors: Blue (1993),4.252207,3.735299,2.909408,3.993753,3.735299,2.885246
58,59,Three Colors: Red (1994),4.29838,3.616093,3.019283,3.957236,3.616093,2.885246
57,58,Quiz Show (1994),3.801467,3.266016,3.513288,3.533741,3.266016,2.885246
64,65,What's Eating Gilbert Grape (1993),3.756805,3.097194,4.666919,3.426999,3.097194,2.885246
52,53,Natural Born Killers (1994),3.260756,2.70404,3.636862,2.982398,2.70404,2.885246


Unnamed: 0,movie id,movie title,user2 rating,user17 rating,user35 rating,average,minimum,score
84,85,"Ref, The (1994)",3.29265,2.364469,3.515338,2.828559,2.364469,2.146086
32,33,Desperado (1995),3.64142,2.720421,3.158976,3.180921,2.720421,2.122564
48,49,I.Q. (1994),3.485273,2.665797,29.361209,3.075535,2.665797,2.122564
75,76,Carlito's Way (1993),3.767388,2.665599,3.093774,3.216493,2.665599,2.122564
30,31,Crimson Tide (1995),3.72639,2.934078,2.679616,3.330234,2.934078,2.108499
87,88,Sleepless in Seattle (1993),3.477083,2.604551,1.584007,3.040817,2.604551,2.108499
6,7,Twelve Monkeys (1995),3.973975,4.0,3.903087,3.986987,3.973975,2.108499
9,10,Richard III (1995),2.0,3.737149,3.475016,2.868574,2.0,2.108499
15,16,French Twist (Gazon maudit) (1995),3.774053,3.187474,3.140855,3.480763,3.187474,2.108499
60,61,Three Colors: White (1994),4.007329,3.629012,2.512506,3.818171,3.629012,2.108499


Unnamed: 0,movie id,movie title,user2 rating,user17 rating,user35 rating,average,minimum,score
84,85,"Ref, The (1994)",3.29265,2.364469,3.515338,2.828559,2.364469,2.141044
75,76,Carlito's Way (1993),3.767388,2.665599,3.093774,3.216493,2.665599,2.118333
32,33,Desperado (1995),3.64142,2.720421,3.158976,3.180921,2.720421,2.118333
48,49,I.Q. (1994),3.485273,2.665797,29.361209,3.075535,2.665797,2.118333
61,62,Stargate (1994),3.389622,2.683476,0.005864,3.036549,2.683476,2.104754
59,60,Three Colors: Blue (1993),4.252207,3.735299,2.909408,3.993753,3.735299,2.104754
58,59,Three Colors: Red (1994),4.29838,3.616093,3.019283,3.957236,3.616093,2.104754
57,58,Quiz Show (1994),3.801467,3.266016,3.513288,3.533741,3.266016,2.104754
64,65,What's Eating Gilbert Grape (1993),3.756805,3.097194,4.666919,3.426999,3.097194,2.104754
52,53,Natural Born Killers (1994),3.260756,2.70404,3.636862,2.982398,2.70404,2.104754


Unnamed: 0,movie id,movie title,user2 rating,user17 rating,user35 rating,average,minimum,score
84,85,"Ref, The (1994)",3.29265,2.364469,3.515338,2.828559,2.364469,2.146086
48,49,I.Q. (1994),3.485273,2.665797,29.361209,3.075535,2.665797,2.122564
75,76,Carlito's Way (1993),3.767388,2.665599,3.093774,3.216493,2.665599,2.122564
32,33,Desperado (1995),3.64142,2.720421,3.158976,3.180921,2.720421,2.122564
86,87,Searching for Bobby Fischer (1993),4.04444,3.390964,5.585773,3.717702,3.390964,2.108499
87,88,Sleepless in Seattle (1993),3.477083,2.604551,1.584007,3.040817,2.604551,2.108499
6,7,Twelve Monkeys (1995),3.973975,4.0,3.903087,3.986987,3.973975,2.108499
9,10,Richard III (1995),2.0,3.737149,3.475016,2.868574,2.0,2.108499
15,16,French Twist (Gazon maudit) (1995),3.774053,3.187474,3.140855,3.480763,3.187474,2.108499
60,61,Three Colors: White (1994),4.007329,3.629012,2.512506,3.818171,3.629012,2.108499


Unnamed: 0,movie id,movie title,user2 rating,user17 rating,user35 rating,average,minimum,score
84,85,"Ref, The (1994)",3.29265,2.364469,3.515338,2.828559,2.364469,2.141044
32,33,Desperado (1995),3.64142,2.720421,3.158976,3.180921,2.720421,2.118333
48,49,I.Q. (1994),3.485273,2.665797,29.361209,3.075535,2.665797,2.118333
75,76,Carlito's Way (1993),3.767388,2.665599,3.093774,3.216493,2.665599,2.118333
37,38,"Net, The (1995)",3.155893,2.224419,-11.370472,2.690156,2.224419,2.104754
59,60,Three Colors: Blue (1993),4.252207,3.735299,2.909408,3.993753,3.735299,2.104754
58,59,Three Colors: Red (1994),4.29838,3.616093,3.019283,3.957236,3.616093,2.104754
57,58,Quiz Show (1994),3.801467,3.266016,3.513288,3.533741,3.266016,2.104754
64,65,What's Eating Gilbert Grape (1993),3.756805,3.097194,4.666919,3.426999,3.097194,2.104754
52,53,Natural Born Killers (1994),3.260756,2.70404,3.636862,2.982398,2.70404,2.104754
