In [1]:
# Install and import packages into mlenv enviroment 
from sentence_transformers import SentenceTransformer, util
import pandas as pd
from sortedcontainers import SortedDict

In [2]:
# Define model from sentence_transformers
model = SentenceTransformer('all-MiniLM-L6-v2')

In [5]:
# Import cleaned data csvs
file_anime = 'Final Resources/cleaned_anime.csv'
file_la = 'Final Resources/cleaned_live_actions.csv'

In [6]:
# Create dfs
anime_df = pd.read_csv(file_anime, index_col=0) # add index_col=0 so that there aren't 2 index columns!
la_df = pd.read_csv(file_la, index_col=0)

In [8]:
# Uncomment for testing
anime_df = anime_df.head(50)
la_df = la_df.head(50)
anime_df = anime_df.reset_index()
la_df = la_df.reset_index()

In [9]:
# Make a list of anime descriptions from df
anime_desc = anime_df['synopsis'].tolist()

In [10]:
# Make a list of netflix, hulu, amazon prime, disney+ descriptions
la_desc = la_df['description'].tolist()

In [11]:
# Encode anime descriptions 
anime_embedding = model.encode(anime_desc) # Encoding the anime list

In [12]:
# Encode live action descriptions 
la_embedding = model.encode(la_desc) # Encoding the netflix list

In [13]:
# Compute cosine similarities
cos_sim = util.cos_sim(la_embedding, anime_embedding)
#cos_sim

In [21]:
# Build reccomendations column in la_df from cos_sim tensor

recs = []
# for each row in the cos_sim tensor
for i in range(len(cos_sim)):
    mydict={}
    
    # for each col in the row
    for x in range(len(cos_sim[i])):
        #assign title to the column from anime_df["title"][col#]
        try:
            mydict[float(cos_sim[i][x]*-100)] = anime_df["title"][x] + ' (' + (str(round(float(cos_sim[i][x]*100))) +'% Match)')
        except KeyError:
            continue 
        #print(mydict)
        # try:
        #     mydict[anime_df["title"][x],(('%' + str(round(float(cos_sim[i][x]*100)))))] = cos_sim[i][x]
        # except KeyError:
        #     continue  
        # print(mydict)    
    # find the max value in all the columns
    #top2 = sorted(mydict,keys=mydict.get,reverse=True)[:5]
    s = SortedDict(mydict)
    la_df.loc[i,'rec1'] = s.values()[0]
    la_df.loc[i,'rec2'] = s.values()[1]
    la_df.loc[i,'rec3'] = s.values()[2]
    la_df.loc[i,'rec4'] = s.values()[3]
    la_df.loc[i,'rec5'] = s.values()[4]
    #recs.append(top2)

la_df.head()

Unnamed: 0,index,show_id,title,description,release_year,type,listed_in,source,description_without_stopwords,rec1,rec2,rec3,rec4,rec5
0,0,n-s1,Dick Johnson Is Dead,"As her father nears the end of his life, filmm...",2020,Movie,Documentaries,Netflix,"As father nears end life, filmmaker Kirsten Jo...",Kimetsu no Yaiba (30% Match),Kaguya-hime no Monogatari (26% Match),Heartcatch Precure! (26% Match),Paradise Kiss (26% Match),Owarimonogatari 2nd Season (25% Match)
1,1,n-s2,Blood & Water,"After crossing paths at a party, a Cape Town t...",2021,TV Show,"International TV Shows, TV Dramas, TV Mysteries",Netflix,"After crossing paths party, Cape Town teen set...",Heartcatch Precure! (34% Match),Owarimonogatari 2nd Season (27% Match),Kaguya-hime no Monogatari (23% Match),Uchuu Senkan Yamato 2202: Ai no Senshi-tachi (...,K-On! (21% Match)
2,2,n-s3,Ganglands,To protect his family from a powerful drug lor...,2021,TV Show,"Crime TV Shows, International TV Shows, TV Act...",Netflix,"To protect family powerful drug lord, skilled ...",Lupin III: Part II (42% Match),Code Geass: Hangyaku no Lelouch R2 (35% Match),Gintama (33% Match),Kimetsu no Yaiba (31% Match),Bungou Stray Dogs: Dead Apple (30% Match)
3,3,n-s4,Jailbirds New Orleans,"Feuds, flirtations and toilet talk go down amo...",2021,TV Show,"Docuseries, Reality TV",Netflix,"Feuds, flirtations toilet talk go among incarc...",Lucky☆Star: Original na Visual to Animation (3...,Clannad: After Story (20% Match),Toshokan Sensou: Kakumei no Tsubasa (20% Match),Bungou Stray Dogs: Dead Apple (18% Match),Last Exile (17% Match)
4,4,n-s5,Kota Factory,In a city of coaching centers known to train I...,2021,TV Show,"International TV Shows, Romantic TV Shows, TV ...",Netflix,In city coaching centers known train India’s f...,Owarimonogatari 2nd Season (40% Match),Byousoku 5 Centimeter (32% Match),Haikyuu!! Second Season (27% Match),Haikyuu!!: Karasuno Koukou vs. Shiratorizawa G...,Toshokan Sensou: Kakumei no Tsubasa (24% Match)


In [12]:
# Export new csv
la_df.to_csv('data/live_actions_with_anime_recs.csv')