# Recommender Evaluation Function
---

Takes in slot values and prediction function as input \
Perform token match to count number match \
Higher match count should appear in higher rankings of prediction results


# Imports

In [1]:
import os
import pandas as pd
import numpy as np

# Load Data

In [2]:
basedir = os.getcwd()
data_path = os.path.join(basedir, 'fullset_BoW.csv')

In [3]:
df = pd.read_csv(data_path, index_col=0)
df.head()

Unnamed: 0,Title,Genre,Director,Actors,BoW_genre,BoW_director,BoW_actors,BoW_genre_director,BoW_genre_actors,BoW_director_actors,Bag_of_words
0,Toy Story (1995),"Adventure,Animation,Children,Comedy,Fantasy",John Lasseter,"Tim Allen, Tom Hanks, Don Rickles, Jim Varney,...",adventure animation children comedy fantasy,john lasseter,tim allen tom hanks don rickles jim varney ...,adventure animation children comedy fantasy jo...,adventure animation children comedy fantasy ti...,john lasseter tim allen tom hanks don rickles,adventure animation children comedy fantasy jo...
1,Jumanji (1995),"Adventure,Children,Fantasy",Joe Johnston,"Jonathan Hyde, Bradley Pierce, Robin Williams,...",adventure children fantasy,joe johnston,jonathan hyde bradley pierce robin williams ...,adventure children fantasy joe johnston,adventure children fantasy jonathan hyde brad...,joe johnston jonathan hyde bradley pierce ro...,adventure children fantasy joe johnston jonath...
2,Grumpier Old Men (1995),"Comedy,Romance",Howard Deutch,"Jack Lemmon, Walter Matthau, Ann-Margret , Sop...",comedy romance,howard deutch,jack lemmon walter matthau ann-margret sop...,comedy romance howard deutch,comedy romance jack lemmon walter matthau an...,howard deutch jack lemmon walter matthau ann...,comedy romance howard deutch jack lemmon walt...
3,Waiting to Exhale (1995),"Comedy,Drama,Romance",Forest Whitaker,"Angela Bassett, Loretta Devine, Whitney Housto...",comedy drama romance,forest whitaker,angela bassett loretta devine whitney housto...,comedy drama romance forest whitaker,comedy drama romance angela bassett loretta d...,forest whitaker angela bassett loretta devine...,comedy drama romance forest whitaker angela ba...
4,Father of the Bride Part II (1995),Comedy,Charles Shyer,"Steve Martin, Martin Short, Diane Keaton, Kimb...",comedy,charles shyer,steve martin martin short diane keaton kimb...,comedy charles shyer,comedy steve martin martin short diane keaton,charles shyer steve martin martin short dian...,comedy charles shyer steve martin martin shor...


In [4]:
data_title = df['Title'].to_numpy()
data_genre = df['Genre'].apply(lambda x: ", ".join(x.lower().split(','))).to_numpy()
data_director = df['Director'].apply(lambda x: ', '.join([name.strip() for name in x.lower().split(',')[:3]])).to_numpy()
data_actor = df['Actors'].apply(lambda x: ', '.join([name.strip() for name in x.lower().split(',')[:3]])).to_numpy()

In [5]:
data_genre

array(['adventure, animation, children, comedy, fantasy',
       'adventure, children, fantasy', 'comedy, romance', ...,
       'documentary', 'comedy, drama', 'action, adventure, drama'],
      dtype=object)

# Token Match Function

In [6]:
a = ['a', 'b', 'c']
b = 'd'
(b in a)*1

0

In [7]:
def token_match_score(genre_input, director_input, actor_input, data):
    data_title = data['Title'].to_numpy()
    data_genre = data['Genre'].apply(lambda x: ', '.join(x.split(','))).to_numpy()
    data_director = data['Director'].apply(lambda x: ', '.join([name.strip() for name in x.split(',')[:3]])).to_numpy()
    data_actor = data['Actors'].apply(lambda x: ', '.join([name.strip() for name in x.split(',')[:3]])).to_numpy()
    
    genre_score = []
    director_score = []
    actor_score = []
    total_score = []
    
    for i in range(len(data_title)):
        if genre_input is not None:
            sample_genre_score = (genre_input in data_genre[i].split(', '))*1
        else:
            sample_genre_score = 0
        
        if director_input is not None:
            sample_director_score = (director_input in data_director[i].split(', '))*1
        else:
            sample_director_score = 0
        
        if actor_input is not None:
            sample_actor_score = (actor_input in data_director[i].split(', '))*1
        else:
            sample_actor_score = 0
        
        sample_total_score = sample_genre_score + sample_director_score + sample_actor_score
        
        genre_score.append(sample_genre_score)
        director_score.append(sample_director_score)
        actor_score.append(sample_actor_score)
        total_score.append(sample_total_score)
        
    
    result_df = pd.DataFrame(np.array([data_title, data_genre, data_director, data_actor, genre_score, director_score, actor_score, total_score]).T, 
                            columns=['Title', 'Genre', 'Director', 'Actor', 'Pred_Genre_Score', 'Pred_Director_Score', 'Pred_Actor_Score', 'Pred_Total_Score'])
    
    return result_df
    

In [15]:
# Example

# genre_input = 'Crime'
# director_input = 'Francis Ford Coppola'
# genre_input = 'Adventure'
# director_input = 'John Lasseter'
genre_input = None
director_input = None
actor_input = 'Ben Affleck'

example_result_df = token_match_score(genre_input, director_input, actor_input, df)


In [17]:
example_result_df[example_result_df['Pred_Total_Score']==1]

Unnamed: 0,Title,Genre,Director,Actor,Pred_Genre_Score,Pred_Director_Score,Pred_Actor_Score,Pred_Total_Score
11771,Gone Baby Gone (2007),"Crime, Drama, Mystery",Ben Affleck,"Casey Affleck, Morgan Freeman, Ed Harris",0,0,1,1
15080,"Town, The (2010)","Crime, Drama, Thriller",Ben Affleck,"Ben Affleck, Rebecca Hall, Jeremy Renner",0,0,1,1
18347,Argo (2012),"Drama, Thriller",Ben Affleck,"Ben Affleck, Bryan Cranston, Alan Arkin",0,0,1,1
38748,Live by Night (2017),"Crime, Drama",Ben Affleck,"Ben Affleck, Zoe Saldana, Elle Fanning",0,0,1,1


In [18]:
import os
# from google.colab import drive

import numpy as np
import pandas as pd
import json
import random
import time
import datetime
# from rake_nltk import Rake
import pandas as pd
from scipy import spatial

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

In [19]:
dict={
    "GDA" : 'Bag_of_words',
    "GD"  : 'BoW_genre_director',
    "GA"  : 'BoW_genre_actors',
    "DA"  : 'BoW_director_actors',
    "G"   : 'BoW_genre',
    "D"   : 'BoW_director',
    "A"   : 'BoW_actors',
}

In [40]:
def recommender(genre, director, actors):
  combo = ""
  search_term=""
  if genre != None:
    combo += "G"
    search_term += genre.lower() +" "
  if director != None:
    combo += "D"
    search_term += director.lower() + " "
  if actors != None:
    combo += "A"
    search_term += actors.lower()

  # count = CountVectorizer()
  count = TfidfVectorizer()
  count_matrix = count.fit_transform(df[dict[combo]])

  count_matrix_search=count.transform([search_term])
  cosine_sim = cosine_similarity(count_matrix, count_matrix_search)

  def recommend_search(cosine_sim = cosine_sim):
    recommended_movies = []
    score_series = pd.Series(cosine_sim).sort_values(ascending = False)
    top_10_indices = list(score_series.iloc[:10].index)
    
    #return df.iloc[top_10_indices]
    for i in top_10_indices:
      recommended_movies.append(f"Title: {list(df['Title'])[i]}\nGenre: {list(df['Genre'])[i]}\nDirector: {list(df['Director'])[i]}\nActors: {','.join(df['Actors'][i].split(',')[:3])}")

    return recommended_movies

  return recommend_search(cosine_sim.flatten())

recommender(genre_input,director_input, actor_input)

['Title: Paycheck (2003)\nGenre: Action,Sci-Fi,Thriller\nDirector: John Woo\nActors: Ben Affleck, Uma Thurman, Aaron Eckhart',
 'Title: Changing Lanes (2002)\nGenre: Drama,Thriller\nDirector: Roger Michell\nActors: Ben Affleck, Samuel L. Jackson, Toni Collette',
 "Title: The Accountant (2016)\nGenre: Crime,Drama,Thriller\nDirector: Gavin O'Connor\nActors: Anna Kendrick,Ben Affleck,J.K. Simmons",
 'Title: Forces of Nature (1999)\nGenre: Comedy,Romance\nDirector: Bronwen Hughes\nActors: Ben Affleck, Steve Zahn, Sandra Bullock',
 'Title: Gerry (2002)\nGenre: Adventure,Drama\nDirector: Gus Van Sant\nActors: Matt Damon, Casey Affleck',
 'Title: Reindeer Games (2000)\nGenre: Action,Thriller\nDirector: John Frankenheimer\nActors: Ben Affleck, Dennis Farina, Gary Sinise',
 'Title: Good Will Hunting (1997)\nGenre: Drama,Romance\nDirector: Gus Van Sant\nActors: Ben Affleck, Matt Damon, Stellan Skarsgård',
 'Title: Hollywoodland (2006)\nGenre: Crime,Drama,Mystery,Thriller\nDirector: Allen Coulter