In [1]:
# Install and import packages into mlenv enviroment 
from sentence_transformers import SentenceTransformer, util
import pandas as pd

In [2]:
# Define model from sentence_transformers
model = SentenceTransformer('all-MiniLM-L6-v2')

In [3]:
# Import cleaned data csvs
file_anime = 'data/cleaned_anime.csv'
file_la = 'data/cleaned_live_actions.csv'

In [4]:
# Create dfs
anime_df = pd.read_csv(file_anime, index_col=0) # add index_col=0 so that there aren't 2 index columns!
la_df = pd.read_csv(file_la, index_col=0)

In [5]:
# Make a list of anime descriptions from df
anime_desc = anime_df['synopsis'].tolist()

In [6]:
# Make a list of netflix, hulu, amazon prime, disney+ descriptions
la_desc = la_df['description'].tolist()

In [7]:
# Encode anime descriptions 
anime_embedding = model.encode(anime_desc) # Encoding the anime list

In [8]:
# Encode live action descriptions 
la_embedding = model.encode(la_desc) # Encoding the netflix list

In [9]:
# Compute cosine similarities
cos_sim = util.cos_sim(la_embedding, anime_embedding)
#cos_sim

In [11]:
# Build reccomendations column in la_df from cos_sim tensor

recs = []
# for each row in the cos_sim tensor
for i in range(len(cos_sim)):
    mydict={}

    # for each col in the row
    for x in range(len(cos_sim[i])):
        #assign title to the column from anime_df["title"][col#]
        try:
            mydict[anime_df["title"][x]] = cos_sim[i][x]
        except KeyError:
            continue

    # find the max value in all the columns
    top2 = sorted(mydict, key=mydict.get, reverse=True)[:5]
    recs.append(top2)

la_df['recommended animes'] = recs

la_df.head()

Unnamed: 0,show_id,title,description,source,recommended animes
0,n-s1,Dick Johnson Is Dead,"As her father nears the end of his life, filmm...",Netflix,"[Higashi no Eden, Mobile Suit Gundam Thunderbo..."
1,n-s2,Blood & Water,"After crossing paths at a party, a Cape Town t...",Netflix,"[Ranma ½ OVA, Stringendo+Accelerando Ultimatum..."
2,n-s3,Ganglands,To protect his family from a powerful drug lor...,Netflix,"[Azumanga Daioh, Halo Legends, Non Non Biyori,..."
3,n-s4,Jailbirds New Orleans,"Feuds, flirtations and toilet talk go down amo...",Netflix,"[Shokugeki no Souma: Shin no Sara, Stand By Me..."
4,n-s5,Kota Factory,In a city of coaching centers known to train I...,Netflix,"[Genshiken, Tate no Yuusha no Nariagari, Saena..."


In [12]:
# Export new csv
la_df.to_csv('data/live_actions_with_anime_recs.csv')