# 9. Gyakorlat: Ajánló rendszerek - metaadat-alapú ajánló

In [5]:
import datetime
import warnings
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.metrics.pairwise import linear_kernel

warnings.filterwarnings("ignore")

## Ajánló rendszer készítése filmek leírása alapján

In [2]:
df = pd.read_csv('movies_metadata.csv', parse_dates=True)

df = df[['title','genres','runtime','vote_average','vote_count','release_date','overview','id']]

df.rename(columns={'release_date':'year'}, inplace=True)

df.head()

Unnamed: 0,title,genres,runtime,vote_average,vote_count,year,overview,id
0,Toy Story,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",81.0,7.7,5415.0,1995-10-30,"Led by Woody, Andy's toys live happily in his ...",862
1,Jumanji,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",104.0,6.9,2413.0,1995-12-15,When siblings Judy and Peter discover an encha...,8844
2,Grumpier Old Men,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",101.0,6.5,92.0,1995-12-22,A family wedding reignites the ancient feud be...,15602
3,Waiting to Exhale,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",127.0,6.1,34.0,1995-12-22,"Cheated on, mistreated and stepped on, the wom...",31357
4,Father of the Bride Part II,"[{'id': 35, 'name': 'Comedy'}]",106.0,5.7,173.0,1995-02-10,Just when George Banks has recovered from his ...,11862


## Nem és dátum oszlopok átalakítása

In [3]:
def convert_int(x):
    try:
        return datetime.datetime.strptime(str(x), '%Y-%m-%d').year
    except: 
        return 0

df['year'] = df['year'].apply(convert_int)

df['genres'] = df['genres'].apply(literal_eval)

df['genres'] = df['genres'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])

df.head()

Unnamed: 0,title,genres,runtime,vote_average,vote_count,year,overview,id
0,Toy Story,"[Animation, Comedy, Family]",81.0,7.7,5415.0,1995,"Led by Woody, Andy's toys live happily in his ...",862
1,Jumanji,"[Adventure, Fantasy, Family]",104.0,6.9,2413.0,1995,When siblings Judy and Peter discover an encha...,8844
2,Grumpier Old Men,"[Romance, Comedy]",101.0,6.5,92.0,1995,A family wedding reignites the ancient feud be...,15602
3,Waiting to Exhale,"[Comedy, Drama, Romance]",127.0,6.1,34.0,1995,"Cheated on, mistreated and stepped on, the wom...",31357
4,Father of the Bride Part II,[Comedy],106.0,5.7,173.0,1995,Just when George Banks has recovered from his ...,11862


## TF-IDF és átalakítás

In [4]:
# TF-IDF vektorizáló beimportálása
from sklearn.feature_extraction.text import TfidfVectorizer

# TF-IDF objektum létrehozása és stopszavak kiküszöbölése
tfidf = TfidfVectorizer(stop_words='english')

#NaN kicserélése üres szavakra
df['overview'] = df['overview'].fillna('')

# Végső TF-IDF műtrix létrehozása
tfidf_matrix = tfidf.fit_transform(df['overview'])

print(tfidf_matrix.shape)

(45466, 75827)


Belső szorzat kiszámítása lineáris kernellel

In [6]:
# Koszinusz hasonlósági mátrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

Egy fordított leképezés létrehozása

In [7]:
indices = pd.Series(df.index, index=df['title']).drop_duplicates()

Függvény, ami egy filmet fogad paraméterként, és kiadja a javaslatokat 

In [9]:
def content_recommender(title, cosine_sim=cosine_sim, df=df, indices=indices):
    # A címnek megfelelő index lekérése
    idx = indices[title]

    # Páros hasonlósági pontszámok lekérése és tuple-listává alakítása
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Filmek rendezése a koszinusz hasonlósági pontok alapján
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # A 10 leghasonlóbb film lekérése
    sim_scores = sim_scores[1:11]

    # Indexek kitranszformálása
    movie_indices = [i[0] for i in sim_scores]

    # Top 10 film lekérése 
    return df['title'].iloc[movie_indices]

### Az oroszlánkirályhoz javaslatokat lekérni

In [10]:
print(content_recommender('Star Wars'))
print()
print(content_recommender('The Lion King'))
print()
print(content_recommender('The Godfather'))

1154                               The Empire Strikes Back
30434                        The Star Wars Holiday Special
26555                         Star Wars: The Force Awakens
1167                                    Return of the Jedi
34153           Samson and the Seven Miracles of the World
1267                                          Mad Dog Time
5187                                   The Triumph of Love
37834                                         Call to Arms
25104    1½ Knights - In Search of the Ravishing Prince...
24388                                      Sleeping Beauty
Name: title, dtype: object

34682    How the Lion Cub and the Turtle Sang a Song
9353                                The Lion King 1½
9115                  The Lion King 2: Simba's Pride
42829                                           Prey
25654                                 Fearless Fagan
17041                                   African Cats
27933              Massaï, les guerriers de la pluie
6094       