In [1]:
%pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3162991 sha256=20fedd6d3bb8caebd5eb875c498647ea2d2b74c05da10e9204bf86d8b51239a9
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.3 surprise-0.1


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import requests
import json
from tqdm import tqdm
import pickle
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate
from tqdm import tqdm

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **데이터&모델 로드**

In [4]:
tmdb_movies=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/green_academy/data/filtered_custom_tmdb.csv')
ratings = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/green_academy/data/ml-25m/ratings.csv')
links = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/green_academy/data/ml-25m/links.csv')

In [5]:
tmdb_movies['id'] = tmdb_movies['id'].astype(int)

In [6]:
total_movies = tmdb_movies['id'].astype(int).tolist()
links = links.dropna()
links['tmdbId'] = links['tmdbId'].astype(int)
links = links[links['tmdbId'].isin(total_movies)]

In [7]:
ratings_tmdb = pd.merge(ratings, links, on='movieId')

In [8]:
with open('/content/drive/MyDrive/Colab Notebooks/green_academy/data/poster_KM_dict.json', 'r') as file:
    poster_cluster = json.load(file)

In [9]:
def preprocess_poster_cluster(poster_cluster):
    tmdb_to_cluster = {}
    for cluster_id, images in poster_cluster.items():
        for image_filename in images:
            # Extract tmdbId from filename by removing the file extension
            tmdb_id = image_filename.split('.')[0]
            # Map tmdbId to the cluster ID
            tmdb_to_cluster[tmdb_id] = cluster_id
    return tmdb_to_cluster


# Preprocess the poster_cluster to create a mapping
poster_cluster = preprocess_poster_cluster(poster_cluster)

In [10]:
movies_in_rating = ratings_tmdb['tmdbId'].unique().tolist()
movies_with_poster = [value for key in poster_cluster for value in poster_cluster[key]]

In [11]:
with open('/content/drive/MyDrive/Colab Notebooks/green_academy/models/minmax_svd_tmdb_movies.pkl', 'rb') as file:
    svd = pickle.load(file)

with open('/content/drive/MyDrive/Colab Notebooks/green_academy/models/cosine_sim_overview.pkl', 'rb') as file:
    overview_sim = pickle.load(file)

with open('/content/drive/MyDrive/Colab Notebooks/green_academy/models/cosine_sim_others.pkl', 'rb') as file:
    detail_sim = pickle.load(file)

# **Function 정의**

In [38]:
def get_unrated_movies(user_id):
    # Convert series to sets for faster operation
    user_rated_movie_ids = set(ratings_tmdb[ratings_tmdb['userId'] == user_id]['tmdbId'])
    all_movie_ids = set(tmdb_movies['id'])

    # Use set difference to find unrated movies
    unrated_movie_ids = list(all_movie_ids - user_rated_movie_ids)

    # Filter tmdb_movies for unrated movies
    unrated_movies = tmdb_movies[tmdb_movies['id'].isin(unrated_movie_ids)]
    return unrated_movies

def get_rated_movies(user_id):
    # Assuming ratings_tmdb is a DataFrame with 'userId' as one of its columns.
    rated_movies = ratings_tmdb[ratings_tmdb['userId'] == user_id]
    return rated_movies

def rate_unrated_movies(user_id, top_n=None):
    unrated_movies = get_unrated_movies(user_id)
    ratings_list = [{'id': row['id'], 'est_rating': svd.predict(user_id, row['id']).est} for index, row in unrated_movies.iterrows()]
    est_ratings = pd.DataFrame(ratings_list).sort_values(by='est_rating', ascending=False)
    return est_ratings if top_n is None else est_ratings.head(top_n)

def rate_rated_movies(user_id, top_n=None):
    rated_movies = get_rated_movies(user_id)
    # Assuming 'tmdbId' is the correct column in rated_movies DataFrame
    ratings_list = [{'id': row['tmdbId'], 'est_rating': svd.predict(user_id, row['tmdbId']).est} for index, row in rated_movies.iterrows()]
    est_ratings = pd.DataFrame(ratings_list).sort_values(by='est_rating', ascending=False)
    return est_ratings if top_n is None else est_ratings.head(top_n)

def update_recommendations_df(m_id, similarity, recommendations_df):
    # Convert DataFrame to a dictionary for faster update, if not already done
    if isinstance(recommendations_df, pd.DataFrame):
        # Initialize or update the dictionary
        rec_dict = {row['id']: {'similarity': row['similarity'], 'count': row['count']} for index, row in recommendations_df.iterrows()}
    else:
        rec_dict = recommendations_df  # It's already a dictionary

    if m_id in rec_dict:
        # Update only if similarity is higher and increment count
        if similarity > rec_dict[m_id]['similarity']:
            rec_dict[m_id]['similarity'] = similarity
        rec_dict[m_id]['count'] += 1
    else:
        # Add new entry
        rec_dict[m_id] = {'similarity': similarity, 'count': 1}

    # Convert back to DataFrame for return, if necessary
    new_recommendations_df = pd.DataFrame.from_dict(rec_dict, orient='index', columns=['similarity', 'count']).reset_index().rename(columns={'index': 'id'})
    return new_recommendations_df

def get_movie_from_sim_cosine(tmdb_id, similarity_matrix, top_n=None):
    # Assume total_movies is a list or array where index can be found quickly
    idx = total_movies.index(tmdb_id)

    # Extract the similarity scores and ignore the first one (self-similarity)
    sim_scores = similarity_matrix[idx, 1:] if top_n is None else similarity_matrix[idx, 1:top_n + 1]
    if top_n:
        top_n = int(top_n/2)
    # Get indices of the sorted scores (in descending order). Use 'argpartition' for top_n for better performance
    if top_n is None:
        sorted_indices = np.argsort(-sim_scores)
    else:
        # argpartition doesn't fully sort but partitions the array such that the kth element is in its sorted position
        # and all elements lower are to its left, but unsorted. Here, we get top_n elements then sort those for accuracy.
        top_indices = np.argpartition(-sim_scores, range(top_n))[:top_n]
        sorted_indices = top_indices[np.argsort(-sim_scores[top_indices])]

    # Construct the list of tuples for tmdb_id and scores
    tmdb_id_scores = [(tmdb_movies['id'].iloc[i], sim_scores[i]) for i in sorted_indices]

    return tmdb_id_scores

def get_similar_movies(user_id, similarity_matrix, movies, top_n=None, rated=True):
    recommendations = []
    # rated_movies = get_rated_movies(1)
    if top_n:
        num_rows = movies.shape[0]
        top_percent = int(num_rows * 0.2)
    else:
        top_percent = movies.shape[0]
    if rated:
        top_percent_movies = movies.sort_values(by='rating', ascending=False).head(top_percent)
        col_name = 'tmdbId'
    else:
        top_percent_movies = movies.sort_values(by='est_rating', ascending=False).head(top_percent)
        col_name = 'id'

    for idx, row in top_percent_movies.iterrows():
        recommendations.extend(get_movie_from_sim_cosine(row[col_name], overview_sim, top_n))

    new_recs_df = pd.DataFrame(recommendations, columns=['id', 'similarity'])

    agg_recs = new_recs_df.groupby('id').agg(
        similarity=('similarity', 'max'),
        count=('id', 'size')
    ).reset_index()

    sim_recommendations = agg_recs[agg_recs['id'].isin(movies[col_name].tolist())]
    return sim_recommendations

def gen_ratings_with_posters(df):
    temp_df = df.copy()
    temp_df['tmdbId'] = temp_df['tmdbId'].astype(str)
    temp_df['poster_cluster_id'] = temp_df['tmdbId'].map(poster_cluster)
    temp_df.to_csv('/content/drive/MyDrive/Colab Notebooks/green_academy/models/ratings_tmdb.csv', index=False)
    return temp_df

def gen_evaluation_df(recommend_df):
    evaluation_df = pd.DataFrame()
    evaluation_df['id'] = recommend_df['id']
    evaluation_df['est_rating'] = recommend_df['est_rating']
    evaluation_df['overview_similarity'] = recommend_df['similarity_overview']
    evaluation_df['overview_count'] = recommend_df['count_overview']
    evaluation_df['detail_similarity'] = recommend_df['similarity_detail']
    evaluation_df['detail_count'] = recommend_df['count_detail']
    evaluation_df['poster_score'] = recommend_df['poster_score']
    est_neut_score = (evaluation_df['est_rating'].median() + evaluation_df['est_rating'].mean())/2
    evaluation_df['est_rating'].fillna(est_neut_score, inplace=True)
    evaluation_df['overview_similarity'].fillna(0, inplace=True)
    evaluation_df['overview_count'].fillna(0, inplace=True)
    evaluation_df['detail_similarity'].fillna(0, inplace=True)
    evaluation_df['detail_count'].fillna(0, inplace=True)
    evaluation_df['poster_score'].fillna(0, inplace=True)
    evaluation_df['userId'] = recommend_df['userId']
    return evaluation_df

def get_recommendations(user_id, ratings_tmdb_posters, top_n=None, rated=True, cum=False):
    if rated == True:
        movies = rate_rated_movies(user_id)
        rated_movies = get_rated_movies(user_id)
        top_overview = get_similar_movies(user_id, overview_sim, rated_movies, top_n)
        top_detail = get_similar_movies(user_id, detail_sim, rated_movies, top_n)
        # top_sim_rec = pd.merge(top_overview, top_detail, on='id', how='outer', suffixes=('_overview', '_detail'))
        movies['id'] = movies['id'].astype(int)
        top_overview['id'] = top_overview['id'].astype(int)
        top_detail['id'] = top_detail['id'].astype(int)
        # top_sim_rec['id'] = top_sim_rec['id'].astype(int).astype(str)
        # recommend_df = pd.merge(movies, top_sim_rec, on='id', how='left')
    else:
        movies = rate_unrated_movies(user_id)
        top_overview = get_similar_movies(user_id, overview_sim, movies, top_n, rated)
        top_detail = get_similar_movies(user_id, detail_sim, movies, top_n, rated)
        # top_sim_rec = pd.merge(top_overview, top_detail, on='id', how='outer', suffixes=('_overview', '_detail'))
        movies['id'] = movies['id'].astype(int)
        top_overview['id'] = top_overview['id'].astype(int)
        top_detail['id'] = top_detail['id'].astype(int)
        # top_sim_rec['id'] = top_sim_rec['id'].astype(int).astype(str)
        # recommend_df = pd.merge(movies, top_sim_rec, on='id', how='outer')
    user_posters = ratings_tmdb_posters[ratings_tmdb_posters['userId'].astype(int) == user_id]['poster_cluster_id'].value_counts()
    movies['poster_cluster_id'] = movies['id'].astype(str).map(poster_cluster)
    movies['poster_score'] = movies['poster_cluster_id'].apply(lambda x: user_posters[int(x)] if pd.notna(x) and int(x) in user_posters.index.to_list() else None)
    top_overview['poster_cluster_id'] = top_overview['id'].astype(str).map(poster_cluster)
    top_overview['poster_score'] = top_overview['poster_cluster_id'].apply(lambda x: user_posters[int(x)] if pd.notna(x) and int(x) in user_posters.index.to_list() else None)
    top_overview['overview_score'] = top_overview['similarity'] * top_overview['count']
    top_detail['poster_cluster_id'] = top_detail['id'].astype(str).map(poster_cluster)
    top_detail['poster_score'] = top_detail['poster_cluster_id'].apply(lambda x: user_posters[int(x)] if pd.notna(x) and int(x) in user_posters.index.to_list() else None)
    top_detail['detail_score'] = top_detail['similarity'] * top_detail['count']
    top_overview = top_overview.sort_values(by='overview_score', ascending=False)
    top_detail = top_detail.sort_values(by='detail_score', ascending=False)

    if not cum:
        if top_n:
            return movies.head(top_n), top_overview.head(top_n), top_detail.head(top_n)
        else:
            return movies, top_overview, top_detail
    else:
        top_detail = top_detail.drop(columns=['poster_cluster_id', 'poster_score'])
        top_overview = top_overview.drop(columns=['poster_cluster_id', 'poster_score'])
        top_sim_rec = pd.merge(top_overview, top_detail, on='id', how='outer', suffixes=('_overview', '_detail'))
        recommend_df = pd.merge(movies, top_sim_rec, on='id', how='left')
        recommend_df['userId'] = user_id
        return recommend_df

def gen_final_score(df, score_col):
    scaler = MinMaxScaler(feature_range=(0.5, 5))
    df_temp = df.sort_values(by=[score_col, 'poster_score'], ascending=False).reset_index(drop=True)
    df['final_score'] = df[score_col] * df['poster_score']
    df.sort_values(by='final_score', ascending=False)
    df['scaled_final_score'] = scaler.fit_transform(df[['final_score']])
    final = df.sort_values(by='scaled_final_score', ascending=False).reset_index(drop=True)[['id', 'scaled_final_score']]
    final['source'] = score_col
    return final

def final_ranking(df1, df2, df3, top_n=10):
    # Concatenate the dataframes
    combined_df = pd.concat([df1, df2, df3])

    # Sort the dataframe first by 'id' and then by 'scaled_final_score' in descending order
    combined_df.sort_values(by=['id', 'scaled_final_score'], ascending=[True, False], inplace=True)

    # Drop duplicates to keep the entry with the highest 'scaled_final_score' for each 'id'
    final_ranked = combined_df.drop_duplicates(subset=['id'], keep='first')

    # Sort again by 'scaled_final_score' to get the final ranking
    final_ranked = final_ranked.sort_values(by='scaled_final_score', ascending=False).reset_index(drop=True)

    # Return the top n entries
    final_ranked['title'] = final_ranked['id'].map(title_series)
    return final_ranked.head(top_n)

## **포스터 점수 DF 생성/로드**

In [13]:
if not os.path.exists('/content/drive/MyDrive/Colab Notebooks/green_academy/models/ratings_tmdb.csv'):
    ratings_tmdb_posters = gen_ratings_with_posters(ratings_tmdb)
else:
    ratings_tmdb_posters = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/green_academy/models/ratings_tmdb.csv')

title_series = tmdb_movies.set_index('id')['title']

# **유저별 평가 완료 작품에 대한 예측**

In [41]:
a = get_recommendations(1, ratings_tmdb_posters, cum=True)
a.head()

Unnamed: 0,id,est_rating,poster_cluster_id,poster_score,similarity_overview,count_overview,overview_score,similarity_detail,count_detail,detail_score,userId
0,614,0.783628,9,4,0.029186,42,1.225796,0.029186,42,1.225796,1
1,832,0.781228,8,6,0.324372,42,13.623607,0.324372,42,13.623607,1
2,797,0.769568,8,6,1.0,42,42.0,1.0,42,42.0,1
3,11645,0.767094,5,5,0.080568,42,3.383871,0.080568,42,3.383871,1
4,490,0.759874,0,6,0.066321,42,2.785466,0.066321,42,2.785466,1


In [42]:
b = gen_evaluation_df(a)
b.head()

Unnamed: 0,id,est_rating,overview_similarity,overview_count,detail_similarity,detail_count,poster_score,userId
0,614,0.783628,0.029186,42,0.029186,42,4,1
1,832,0.781228,0.324372,42,0.324372,42,6,1
2,797,0.769568,1.0,42,1.0,42,6,1
3,11645,0.767094,0.080568,42,0.080568,42,5,1
4,490,0.759874,0.066321,42,0.066321,42,6,1


## **사용자 5%에 대하여 평가 완료 작품의 예측 점수 산출 및 저장**

In [None]:
total_user_list = ratings_tmdb['userId'].unique()
total_user_series = pd.Series(total_user_list)
list_len = len(total_user_list)
sample_size = int(list_len*0.05)
user_list = total_user_series.sample(n=sample_size, random_state=42)

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
from multiprocessing import Pool


def process_user(user_id):
    recommend_df = get_recommendations(user_id, ratings_tmdb_posters)
    evaluation_df = gen_evaluation_df(recommend_df)
    return evaluation_df

with Pool(8) as pool:
    # Map process_user function to user IDs and get the result
    results = list(tqdm(pool.imap(process_user, user_list), total=len(user_list), desc="Processing Users"))

# Concatenate all dataframes into a single dataframe
final_df = pd.concat(results, ignore_index=True)

Processing Users:  94%|█████████▍| 7679/8127 [7:25:03<36:59,  4.95s/it]

In [None]:
final_df.to_csv(f'/content/drive/MyDrive/Colab Notebooks/green_academy/data/rated_movies_scores_5_percent.csv', index=False)

# **유저별 미평가 영화 추천**

In [None]:
svd_rec, overview_rec, detail_rec = get_recommendations(1, ratings_tmdb_posters, top_n=10, rated=False)

### SVD 모델 기반 추천

SVD 모델 기반 사용자 평가 점수 산출 후 각 영화의 ***포스터점수****로 곱하여 최종 점수 산출


> 포스터점수: 각 사용자 별 영화 클러스터 그룹을 기준으로 클러스터에 포함되는 평가된 영화들의 수



In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0.5, 5))
svd_rank = svd_rec.sort_values(by=['est_rating', 'poster_score'], ascending=False).reset_index(drop=True)
svd_rank['final_score'] = svd_rank['est_rating'] * svd_rank['poster_score']
svd_rank.sort_values(by='final_score', ascending=False)
svd_rank['scaled_final_score'] = scaler.fit_transform(svd_rank[['final_score']])
svd_final = svd_rank.sort_values(by='scaled_final_score', ascending=False).reset_index(drop=True)[['id', 'scaled_final_score']]

In [None]:
svd_rank

Unnamed: 0,id,est_rating,poster_cluster_id,poster_score,final_score,scaled_final_score
0,538362,0.919147,2,4.0,3.67659,2.659475
1,398978,0.90585,9,4.0,3.6234,2.563675
2,519010,0.891631,2,4.0,3.566524,2.461238
3,579245,0.884453,9,4.0,3.537811,2.409523
4,517814,0.870525,5,5.0,4.352625,3.87707
5,38396,0.82935,0,6.0,4.976101,5.0
6,458737,0.825867,7,3.0,2.477602,0.5
7,14537,0.824971,0,6.0,4.949828,4.952681
8,92321,0.82129,5,5.0,4.106452,3.433692
9,483184,0.818801,8,6.0,4.912808,4.886005


In [None]:
svd_final

Unnamed: 0,id,scaled_final_score
0,38396,5.0
1,14537,4.952681
2,483184,4.886005
3,517814,3.87707
4,92321,3.433692
5,538362,2.659475
6,398978,2.563675
7,519010,2.461238
8,579245,2.409523
9,458737,0.5


### 영화 Overview Cosine Similarity Matrix 기반 추천

Ovierview Cosine Similarity Matrix 기준 유사도(Similarity) 점수(A)\
해당 영화가 포함된 횟수(Count) 점수(B)\
포스터 점수(C)

합계점수: A x B x C
최종점수: 합게점수에 MinMaxScale(0.5, 5) 적용

In [None]:
overview_rank = overview_rec.sort_values(by=['overview_score', 'poster_score'], ascending=False)
overview_rank['final_score'] = overview_rank['overview_score'] * overview_rank['poster_score']
overview_rank.sort_values(by='final_score', ascending=False)
overview_rank['scaled_final_score'] = scaler.fit_transform(overview_rank[['final_score']])
overview_rank.sort_values(by='scaled_final_score', ascending=False).reset_index(drop=True)
overview_final = overview_rank.sort_values(by='scaled_final_score', ascending=False).reset_index(drop=True)[['id', 'scaled_final_score']]

In [None]:
overview_rank

Unnamed: 0,id,similarity,count,poster_cluster_id,poster_score,overview_score,final_score,scaled_final_score
3,18,1.0,830,2,4,830.0,3320.0,5.0
1,13,1.0,691,9,4,691.0,2764.0,4.203808
0,5,1.0,415,2,4,415.0,1660.0,2.62288
6,28,1.0,384,5,5,384.0,1920.0,2.9952
5,25,1.0,197,3,1,197.0,197.0,0.527864
8,35,1.0,189,0,6,189.0,1134.0,1.869648
4,19,0.197939,827,7,3,163.695757,491.08727,0.948997
2,14,0.122895,735,7,3,90.327815,270.983445,0.633808
7,33,0.108278,420,1,5,45.476766,227.383828,0.571374
9,55,0.183411,242,2,4,44.385479,177.541916,0.5


In [None]:
overview_final

Unnamed: 0,id,scaled_final_score
0,18,5.0
1,13,4.203808
2,28,2.9952
3,5,2.62288
4,35,1.869648
5,19,0.948997
6,14,0.633808
7,33,0.571374
8,25,0.527864
9,55,0.5


### 영화 정보 Cosine Similarity Matrix 기반 추천



> Cast, Crew, Genre, Production Company, Tagline 를 기반으로 분석한 Cosine Similarity Matrix



Detail Cosine Similarity Matrix 기준 유사도(Similarity) 점수(A)\
해당 영화가 포함된 횟수(Count) 점수(B)\
포스터 점수(C)

합계점수: A x B x C
최종점수: 합게점수에 MinMaxScale(0.5, 5) 적용

In [None]:
detail_rank = detail_rec.sort_values(by=['detail_score', 'poster_score'], ascending=False)
detail_rank['final_score'] = detail_rank['detail_score'] * detail_rank['poster_score']
detail_rank.sort_values(by='final_score', ascending=False)
detail_rank['scaled_final_score'] = scaler.fit_transform(detail_rank[['final_score']])
detail_rank.sort_values(by='scaled_final_score', ascending=False).reset_index(drop=True)
detail_final = detail_rank.sort_values(by='scaled_final_score', ascending=False).reset_index(drop=True)[['id', 'scaled_final_score']]


In [None]:
detail_rank

Unnamed: 0,id,similarity,count,poster_cluster_id,poster_score,detail_score,final_score,scaled_final_score
3,18,1.0,830,2,4,830.0,3320.0,5.0
1,13,1.0,691,9,4,691.0,2764.0,4.203808
0,5,1.0,415,2,4,415.0,1660.0,2.62288
6,28,1.0,384,5,5,384.0,1920.0,2.9952
5,25,1.0,197,3,1,197.0,197.0,0.527864
8,35,1.0,189,0,6,189.0,1134.0,1.869648
4,19,0.197939,827,7,3,163.695757,491.08727,0.948997
2,14,0.122895,735,7,3,90.327815,270.983445,0.633808
7,33,0.108278,420,1,5,45.476766,227.383828,0.571374
9,55,0.183411,242,2,4,44.385479,177.541916,0.5


In [None]:
overview_rec.head()

Unnamed: 0,id,similarity,count,poster_cluster_id,poster_score,overview_score
3,18,1.0,830,2,4,830.0
1,13,1.0,691,9,4,691.0
0,5,1.0,415,2,4,415.0
6,28,1.0,384,5,5,384.0
5,25,1.0,197,3,1,197.0


In [None]:
svd_final = gen_final_score(svd_rec, 'est_rating')
overview_final = gen_final_score(overview_rec, 'overview_score')
detail_final = gen_final_score(detail_rec, 'detail_score')

### 최종점수 기반 상위 10개 영화 추천



In [None]:
rec_final = final_ranking(svd_final, overview_final, detail_final, top_n=10)

In [None]:
rec_final

Unnamed: 0,id,scaled_final_score,source
0,18,5.0,overview_score
1,38396,5.0,est_rating
2,14537,4.952681,est_rating
3,483184,4.886005,est_rating
4,13,4.203808,overview_score
5,517814,3.87707,est_rating
6,92321,3.433692,est_rating
7,28,2.9952,overview_score
8,538362,2.659475,est_rating
9,5,2.62288,overview_score


In [None]:
title_series = tmdb_movies.set_index('id')['title']

In [None]:
rec_final['title'] = rec_final['id'].map(title_series)

In [None]:
rec_final

Unnamed: 0,id,scaled_final_score,source,title
0,18,5.0,overview_score,The Fifth Element
1,38396,5.0,est_rating,That's Life
2,14537,4.952681,est_rating,Harakiri
3,483184,4.886005,est_rating,Dogman
4,13,4.203808,overview_score,Forrest Gump
5,517814,3.87707,est_rating,Capernaum
6,92321,3.433692,est_rating,Hotarubi no Mori e
7,28,2.9952,overview_score,Apocalypse Now
8,538362,2.659475,est_rating,On My Skin
9,5,2.62288,overview_score,Four Rooms
