In [1]:
# Install and import packages into mlenv enviroment 
from sentence_transformers import SentenceTransformer, util
import pandas as pd
from sortedcontainers import SortedDict

In [2]:
# Define model from sentence_transformers
model = SentenceTransformer('all-MiniLM-L6-v2')

In [3]:
# Import cleaned data csvs
file_anime = 'Final Resources/cleaned_anime.csv'
file_la = 'Final Resources/cleaned_live_actions.csv'

In [4]:
# Create dfs
anime_df = pd.read_csv(file_anime, index_col=0) # add index_col=0 so that there aren't 2 index columns!
la_df = pd.read_csv(file_la, index_col=0)`

In [5]:
# Uncomment for testing
# anime_df = anime_df.head(50)
# la_df = la_df.head(50)
# anime_df = anime_df.reset_index()
# la_df = la_df.reset_index()

In [6]:
# Make a list of anime descriptions from df
anime_desc = anime_df['description_without_stopwords'].tolist()

In [7]:
# Make a list of netflix, hulu, amazon prime, disney+ descriptions
la_desc = la_df['description_without_stopwords'].tolist()

In [8]:
# Encode anime descriptions 
anime_embedding = model.encode(anime_desc) # Encoding the anime list

In [9]:
# Encode live action descriptions 
la_embedding = model.encode(la_desc) # Encoding the netflix list

In [10]:
# Compute cosine similarities
cos_sim = util.cos_sim(la_embedding, anime_embedding)
#cos_sim

In [13]:
la_df = la_df.reset_index()
anime_df = anime_df.reset_index()

In [14]:
# Build reccomendations column in la_df from cos_sim tensor

recs = []
# for each row in the cos_sim tensor
for i in range(len(cos_sim)):
    mydict={}
    
    # for each col in the row
    for x in range(len(cos_sim[i])):
        #assign title to the column from anime_df["title"][col#]
        try:
            mydict[float(cos_sim[i][x]*-100)] = anime_df["title"][x] + ' (' + (str(round(float(cos_sim[i][x]*100))) +'% Match)')
        except KeyError:
            continue 
        #print(mydict)
        # try:
        #     mydict[anime_df["title"][x],(('%' + str(round(float(cos_sim[i][x]*100)))))] = cos_sim[i][x]
        # except KeyError:
        #     continue  
        # print(mydict)    
    # find the max value in all the columns
    #top2 = sorted(mydict,keys=mydict.get,reverse=True)[:5]
    s = SortedDict(mydict)
    la_df.loc[i,'rec1'] = s.values()[0]
    la_df.loc[i,'rec2'] = s.values()[1]
    la_df.loc[i,'rec3'] = s.values()[2]
    la_df.loc[i,'rec4'] = s.values()[3]
    la_df.loc[i,'rec5'] = s.values()[4]
    #recs.append(top2)

la_df.head()

Unnamed: 0,index,show_id,title,description,release_year,type,listed_in,source,description_without_stopwords,rec1,rec2,rec3,rec4,rec5
0,0,n-s1,Dick Johnson Is Dead,"As her father nears the end of his life, filmm...",2020,Movie,Documentaries,Netflix,"As father nears end life, filmmaker Kirsten Jo...",Mnemosyne: Mnemosyne no Musume-tachi (43% Match),Onmyou Taisenki (42% Match),Kono Subarashii Sekai ni Shukufuku wo! (40% Ma...,Monster (40% Match),Ajin Part 2: Shoutotsu (40% Match)
1,1,n-s2,Blood & Water,"After crossing paths at a party, a Cape Town t...",2021,TV Show,"International TV Shows, TV Dramas, TV Mysteries",Netflix,"After crossing paths party, Cape Town teen set...",Ranma ½: Kessen Tougenkyou! Hanayome wo Torimo...,Oban Star-Racers (40% Match),Seishun Buta Yarou wa Bunny Girl Senpai no Yum...,Bokura no Nanokakan Sensou (39% Match),Seitokai Yakuindomo OVA (38% Match)
2,2,n-s3,Ganglands,To protect his family from a powerful drug lor...,2021,TV Show,"Crime TV Shows, International TV Shows, TV Act...",Netflix,"To protect family powerful drug lord, skilled ...",Lupin III: Nusumareta Lupin (46% Match),Lupin III: Part III (44% Match),Lupin the Third: Mine Fujiko to Iu Onna (43% M...,Tetsuwan Birdy (41% Match),Lupin III: Tenshi no Tactics - Yume no Kakera ...
3,3,n-s4,Jailbirds New Orleans,"Feuds, flirtations and toilet talk go down amo...",2021,TV Show,"Docuseries, Reality TV",Netflix,"Feuds, flirtations toilet talk go among incarc...",School Rumble: Ichi Gakki Hoshuu (43% Match),Kimagure Orange☆Road OVA (42% Match),Jigoku Sensei Nube OVA (39% Match),One Piece Recap (39% Match),Code Geass: Hangyaku no Lelouch R2 Special Edi...
4,4,n-s5,Kota Factory,In a city of coaching centers known to train I...,2021,TV Show,"International TV Shows, Romantic TV Shows, TV ...",Netflix,In city coaching centers known train India’s f...,Kyou kara Ore wa!! (47% Match),Ro-Kyu-Bu! SS (43% Match),Ojiichan no Hanabi (43% Match),Zan Sayonara Zetsubou Sensei (42% Match),Major S3 (42% Match)


In [16]:
# Export new csv
la_df.to_csv('Final Resources/live_actions_with_anime_recs.csv')