# 1. Aquisição dos Dados

In [2]:
import pandas as pd
import numpy as np

In [3]:
movies = pd.DataFrame = pd.read_csv('movies.csv')
ratings = pd.DataFrame = pd.read_csv('ratings.csv')

# 2. Pré-processamento dos Dados

In [4]:
movies.columns = ['movie_id', 'title', 'genres']
movies.set_index('movie_id', inplace=True)
movies.head()

Unnamed: 0_level_0,title,genres
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy


In [5]:
ratings.columns = ['user_id', 'movie_id', 'rating', 'timestamp']

In [6]:
movies["watched"] = ratings.groupby("movie_id")["rating"].count()
movies["mean_rating"] = ratings.groupby("movie_id")["rating"].mean()

# 3. Buscador por usuário

## Obter distância entre o usuário e os outros

In [7]:
def get_distances(user_id):
    distancias = []
    for user in ratings["user_id"].unique():
        if user_id == user:
            continue

        user1_ratings = ratings[ratings["user_id"]==user_id][["movie_id", "rating"]]
        user2_ratings = ratings[ratings["user_id"]==user][["movie_id", "rating"]]

        movies_in_common = pd.merge(user1_ratings, user2_ratings, on="movie_id", suffixes=("_user1", "_user2"))

        if len(movies_in_common) < 10:
            continue

        distance = np.linalg.norm(movies_in_common["rating_user1"] - movies_in_common["rating_user2"])

        distancias.append([user_id, user, distance])

    distancias.sort(key=lambda x: x[2])
    return distancias

## Obter as recomendações com base nos K usuários mais próximos

In [8]:
def get_recommendations(user_id, k=15, min_popularity=5, min_recomendations=3):
    distances = get_distances(user_id)
    user_movies = ratings[ratings["user_id"]==user_id][["movie_id", "rating"]]

    recommendations = []
    movie_neighbors_count = {}
    movie_neighbors_score = {}

    for neighbor in distances[:k]:
        neighbor_id = neighbor[1]
        neighbor_movies = ratings[ratings["user_id"]==neighbor_id][["movie_id", "rating"]]

        for movie_id in neighbor_movies["movie_id"].unique():

            neighbor_movie_score = neighbor_movies[neighbor_movies["movie_id"]==movie_id]["rating"].values[0]

            if movie_id in user_movies["movie_id"].unique():
                continue

            if movies.loc[movie_id]["watched"] < min_popularity:
                continue

            if movie_id in movie_neighbors_count:
                movie_neighbors_count[movie_id] += 1
                movie_neighbors_score[movie_id] += neighbor_movie_score

            else:
                movie_neighbors_count[movie_id] = 1
                movie_neighbors_score[movie_id] = neighbor_movie_score

    for movie_id in movie_neighbors_count:
        if movie_neighbors_count[movie_id] >= min_recomendations:
            recommendations.append([movie_id, movie_neighbors_score[movie_id] / movie_neighbors_count[movie_id], movie_neighbors_count[movie_id]])

    recommendations.sort(key=lambda x: x[1], reverse=True)

    recommendations_df = movies.loc[[x[0] for x in recommendations]]
    recommendations_df["expected_rating"] = [x[1] for x in recommendations]
    recommendations_df["recomendations"] = [x[2] for x in recommendations]
    recommendations_df.sort_values("expected_rating", ascending=False, inplace=True)

    return recommendations_df

## Transformar num dataframe

In [9]:
recommendations = get_recommendations(9, k=10, min_recomendations=3, min_popularity=5)
recommendations.head(10)


Unnamed: 0_level_0,title,genres,watched,mean_rating,expected_rating,recomendations
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
318,"Shawshank Redemption, The (1994)",Crime|Drama,317.0,4.429022,4.928571,7
1247,"Graduate, The (1967)",Comedy|Drama|Romance,79.0,4.063291,4.833333,3
778,Trainspotting (1996),Comedy|Crime|Drama,102.0,4.039216,4.7,5
2288,"Thing, The (1982)",Action|Horror|Sci-Fi|Thriller,45.0,3.933333,4.7,5
2951,"Fistful of Dollars, A (Per un pugno di dollari...",Action|Western,39.0,3.935897,4.7,5
1222,Full Metal Jacket (1987),Drama|War,102.0,4.098039,4.6875,8
1201,"Good, the Bad and the Ugly, The (Buono, il bru...",Action|Adventure|Western,72.0,4.145833,4.666667,6
1732,"Big Lebowski, The (1998)",Comedy|Crime,106.0,3.924528,4.666667,6
3000,Princess Mononoke (Mononoke-hime) (1997),Action|Adventure|Animation|Drama|Fantasy,48.0,3.958333,4.666667,3
31658,Howl's Moving Castle (Hauru no ugoku shiro) (2...,Adventure|Animation|Fantasy|Romance,40.0,4.075,4.666667,3


# 4. Buscar por filme 

In [36]:
def movie_recommendations(movie_id, min_popularity=50, min_recomendations=10):
    # Get all users that watched the movie sorted by rating
    users = ratings[(ratings["movie_id"]==movie_id) & (ratings["rating"] >= 4)][["user_id", "rating"]]
    users.sort_values("rating", ascending=False, inplace=True)

    # Get the recommendations for each user
    recommendations = []
    similar_movies_count = {}
    similar_movies_score = {}
    
    for user in users["user_id"]:
        for movie in ratings[ratings["user_id"]==user]["movie_id"].unique():
            if movie_id == movie:
                continue

            if movies.loc[movie]["watched"] < min_popularity:
                continue

            if ratings[(ratings["user_id"]==user) & (ratings["movie_id"]==movie)]["rating"].values[0] < 4:
                continue

            if movie in similar_movies_count:
                similar_movies_count[movie] += 1
                similar_movies_score[movie] += ratings[(ratings["user_id"]==user) & (ratings["movie_id"]==movie)]["rating"].values[0]

            else:
                similar_movies_count[movie] = 1
                similar_movies_score[movie] = ratings[(ratings["user_id"]==user) & (ratings["movie_id"]==movie)]["rating"].values[0]

    for movie in similar_movies_count:
        if similar_movies_count[movie] >= min_recomendations:
            recommendations.append([movie, similar_movies_score[movie] / similar_movies_count[movie], similar_movies_count[movie]])

    recommendations_df = movies.loc[[x[0] for x in recommendations]]
    recommendations_df["expected_rating"] = [x[1] for x in recommendations]
    recommendations_df["recomendations"] = [x[2] for x in recommendations]
    recommendations_df.sort_values("recomendations", ascending=False, inplace=True)

    return recommendations_df

# 5. Testando recomendações

## Testando por filme

In [43]:
movies.loc[[260]]
    

Unnamed: 0_level_0,title,genres,watched,mean_rating
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,251.0,4.231076


In [44]:
movie_recommendations(260, min_recomendations=50, min_popularity=5).head(10)

Unnamed: 0_level_0,title,genres,watched,mean_rating,expected_rating,recomendations
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1196,Star Wars: Episode V - The Empire Strikes Back...,Action|Adventure|Sci-Fi,211.0,4.21564,4.611111,144
1210,Star Wars: Episode VI - Return of the Jedi (1983),Action|Adventure|Sci-Fi,196.0,4.137755,4.56746,126
2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller,278.0,4.192446,4.614407,118
1198,Raiders of the Lost Ark (Indiana Jones and the...,Action|Adventure,200.0,4.2075,4.530435,115
296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,307.0,4.197068,4.646789,109
593,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller,279.0,4.16129,4.504854,103
318,"Shawshank Redemption, The (1994)",Crime|Drama,317.0,4.429022,4.742574,101
858,"Godfather, The (1972)",Crime|Drama,192.0,4.289062,4.655,100
356,Forrest Gump (1994),Comedy|Drama|Romance|War,329.0,4.164134,4.55914,93
589,Terminator 2: Judgment Day (1991),Action|Sci-Fi,224.0,3.970982,4.522989,87


## Testando por usuário

In [13]:
def show_user(user_id):
    user = ratings[ratings["user_id"]==user_id][["movie_id", "rating"]]

    exibe = pd.merge(user, movies, on="movie_id")
    exibe = exibe[["movie_id", "title", "genres", "rating"]]
    exibe["user_id"] = np.array([9 for x in range(len(exibe))])
    exibe.set_index("user_id", inplace=True)
    exibe.sort_values("rating", ascending=False, inplace=True)
    return exibe.head(10)

In [14]:
show_user(1)

Unnamed: 0_level_0,movie_id,title,genres,rating
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
9,5060,M*A*S*H (a.k.a. MASH) (1970),Comedy|Drama|War,5.0
9,2872,Excalibur (1981),Adventure|Fantasy,5.0
9,1291,Indiana Jones and the Last Crusade (1989),Action|Adventure,5.0
9,1298,Pink Floyd: The Wall (1982),Drama|Musical,5.0
9,2948,From Russia with Love (1963),Action|Adventure|Thriller,5.0
9,2947,Goldfinger (1964),Action|Adventure|Thriller,5.0
9,2944,"Dirty Dozen, The (1967)",Action|Drama|War,5.0
9,2899,Gulliver's Travels (1939),Adventure|Animation|Children,5.0
9,2858,American Beauty (1999),Drama|Romance,5.0
9,2700,"South Park: Bigger, Longer and Uncut (1999)",Animation|Comedy|Musical,5.0


In [15]:
recommendations = get_recommendations(1, k=10, min_recomendations=3, min_popularity=5)
recommendations.head(10)

Unnamed: 0_level_0,title,genres,watched,mean_rating,expected_rating,recomendations
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2324,Life Is Beautiful (La Vita è bella) (1997),Comedy|Drama|Romance|War,88.0,4.147727,5.0,3
5952,"Lord of the Rings: The Two Towers, The (2002)",Adventure|Fantasy,188.0,4.021277,5.0,3
79132,Inception (2010),Action|Crime|Drama|Mystery|Sci-Fi|Thriller|IMAX,143.0,4.066434,5.0,3
318,"Shawshank Redemption, The (1994)",Crime|Drama,317.0,4.429022,4.833333,6
4226,Memento (2000),Mystery|Thriller,159.0,4.122642,4.833333,3
4993,"Lord of the Rings: The Fellowship of the Ring,...",Adventure|Fantasy,198.0,4.106061,4.75,4
858,"Godfather, The (1972)",Crime|Drama,192.0,4.289062,4.666667,3
68157,Inglourious Basterds (2009),Action|Drama|War,88.0,4.136364,4.666667,3
92259,Intouchables (2011),Comedy|Drama,37.0,4.108108,4.666667,3
4973,"Amelie (Fabuleux destin d'Amélie Poulain, Le) ...",Comedy|Romance,120.0,4.183333,4.5,3
