In [None]:
import pandas as pd
from surprise import Dataset
from surprise import Reader
from surprise import KNNBasic
import numpy as np
from ast import literal_eval

### Load & Preprocess data

In [None]:
tmdb_5000_credits = pd.read_csv("datasets/movies-2/tmdb_5000_credits.csv")
tmdb_5000_movies = pd.read_csv("datasets/movies-2/tmdb_5000_movies.csv")

In [None]:
columns_movies = ["budget", "genres", "id", "original_language", "original_title", "overview", "popularity", "production_countries", "release_date", "spoken_languages", "vote_average", "vote_count"]
columns_credits = ["movie_id", "title", "cast", "crew"]

tmdb_5000_movies = tmdb_5000_movies[columns_movies]
tmdb_5000_credits = tmdb_5000_credits[columns_credits]

for column in ["genres", "production_countries", "spoken_languages"]:
    tmdb_5000_movies[column] = tmdb_5000_movies[column].apply(literal_eval)

for column in ["crew", "cast"]:
    tmdb_5000_credits[column] = tmdb_5000_credits[column].apply(literal_eval)    

In [None]:
tmdb_5000_credits.head(5)

Unnamed: 0,movie_id,title,cast,crew
0,19995,Avatar,"[{'cast_id': 242, 'character': 'Jake Sully', '...","[{'credit_id': '52fe48009251416c750aca23', 'de..."
1,285,Pirates of the Caribbean: At World's End,"[{'cast_id': 4, 'character': 'Captain Jack Spa...","[{'credit_id': '52fe4232c3a36847f800b579', 'de..."
2,206647,Spectre,"[{'cast_id': 1, 'character': 'James Bond', 'cr...","[{'credit_id': '54805967c3a36829b5002c41', 'de..."
3,49026,The Dark Knight Rises,"[{'cast_id': 2, 'character': 'Bruce Wayne / Ba...","[{'credit_id': '52fe4781c3a36847f81398c3', 'de..."
4,49529,John Carter,"[{'cast_id': 5, 'character': 'John Carter', 'c...","[{'credit_id': '52fe479ac3a36847f813eaa3', 'de..."


In [None]:
tmdb_5000_movies.head(5)

Unnamed: 0,budget,genres,id,original_language,original_title,overview,popularity,production_countries,release_date,spoken_languages,vote_average,vote_count
0,237000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",19995,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{'iso_3166_1': 'US', 'name': 'United States o...",2009-12-10,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",7.2,11800
1,300000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",285,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{'iso_3166_1': 'US', 'name': 'United States o...",2007-05-19,"[{'iso_639_1': 'en', 'name': 'English'}]",6.9,4500
2,245000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",206647,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2015-10-26,"[{'iso_639_1': 'fr', 'name': 'Français'}, {'is...",6.3,4466
3,250000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",49026,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{'iso_3166_1': 'US', 'name': 'United States o...",2012-07-16,"[{'iso_639_1': 'en', 'name': 'English'}]",7.6,9106
4,260000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",49529,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{'iso_3166_1': 'US', 'name': 'United States o...",2012-03-07,"[{'iso_639_1': 'en', 'name': 'English'}]",6.1,2124


In [None]:
def preprocess_dict(column: str, key: str = "name"):
    genres_column = tmdb_5000_movies[column]
    genres_column_processed = []
    for item in genres_column:
        genres_column_processed.append([dic[key] for dic in item])
    return genres_column_processed


In [None]:
tmdb_5000_movies["genres"] = preprocess_dict("genres")
tmdb_5000_movies["production_countries"] = preprocess_dict("production_countries")
tmdb_5000_movies["spoken_languages"] = preprocess_dict("spoken_languages")

In [None]:
tmdb_5000_movies.head(5)

Unnamed: 0,budget,genres,id,original_language,original_title,overview,popularity,production_countries,release_date,spoken_languages,vote_average,vote_count
0,237000000,"[Action, Adventure, Fantasy, Science Fiction]",19995,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[United States of America, United Kingdom]",2009-12-10,"[English, Español]",7.2,11800
1,300000000,"[Adventure, Fantasy, Action]",285,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,[United States of America],2007-05-19,[English],6.9,4500
2,245000000,"[Action, Adventure, Crime]",206647,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[United Kingdom, United States of America]",2015-10-26,"[Français, English, Español, Italiano, Deutsch]",6.3,4466
3,250000000,"[Action, Crime, Drama, Thriller]",49026,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,[United States of America],2012-07-16,[English],7.6,9106
4,260000000,"[Action, Adventure, Science Fiction]",49529,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,[United States of America],2012-03-07,[English],6.1,2124


In [None]:
tmdb_5000_movies = tmdb_5000_movies.merge(tmdb_5000_credits, left_on='id', right_on='movie_id')

In [None]:
print(tmdb_5000_movies.shape)
tmdb_5000_movies = tmdb_5000_movies.query("budget != 0")
print(tmdb_5000_movies.shape)

(4803, 16)
(3766, 16)


In [None]:
tmdb_5000_movies["cast"] = preprocess_dict("cast", "character")

In [None]:
data = Dataset.load_from_df()
data

Dataset ml-100k could not be found. Do you want to download it? [Y/n] Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to C:\Users\User/.surprise_data/ml-100k


<surprise.dataset.DatasetAutoFolds at 0x14c6d3f7bc0>

In [4]:
print(data)

<surprise.dataset.DatasetAutoFolds object at 0x0000014C6D3F7BC0>


In [3]:
reader = Reader(rating_scale=(1,5))

trainset = data.build_full_trainset()
testset = trainset.build_anti_testset()

sim_options = {'name': 'cosine', 'user_based': False}
model = KNNBasic(sim_options=sim_options)
model.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x14c6dea6ff0>

In [5]:
genre_movies = [item_id for item_id, genres in trainset.ir.items() if 'Action' in genres]

predictions = []
for movie_id in genre_movies:
    predictions.append((movie_id, model.predict(str(1), movie_id).est))
    
top_recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)[:5]

print(f"Recomendaciones de películas de género Action: ")
for movie_id, _ in top_recommendations:
    print(f"Película {movie_id}")

Recomendaciones de películas de género Action: 
