In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from scrape import get_user_anime

In [2]:
# Reading users
users1 = pd.read_csv("clean_data/users1.csv") # each row is a user, each column is an anime (each cell contains the users score of that anime)
users2 = pd.read_csv("clean_data/users2.csv")
users3 = pd.read_csv("clean_data/users3.csv")
users4 = pd.read_csv("clean_data/users4.csv")

# Concatenating Dataframes and computing total watchers

users = pd.concat([users1, users2, users3, users4],ignore_index=True)
users_array = users.to_numpy()

total_watchers = np.ceil(np.sum(users_array, axis=0) / 10.0)
total_watchers = np.maximum(np.ones(len(total_watchers)), total_watchers)

In [3]:
d = {}
d['One Punch Man'] = 30276
d['Death Note'] = 1535
d['Shingeki no Kyojin'] = 16498
d['Angel Beats!'] = 6547
d['Tengen Toppa Gurren Lagann'] = 2001
d['Kimi no Na wa'] = 32281
d['Haikyuu!!'] = 20583
d['Haikyuu!! Second Season'] = 28891
d['Noragami'] = 20507
d['Made in Abyss'] = 34599
d['Planetus'] = 329
d['Hunter x Hunter (2011)'] = 11061
d['Boku no Hero Academia'] = 31964
d['Fairy Tail'] = 6702

In [4]:
all_anime = pd.read_csv("data/AnimeList.csv")

anime_id_map_reverse = np.loadtxt("clean_data/anime_id_map_reverse.csv").astype(np.int)
anime_id_map = {}
for idx in range(len(anime_id_map_reverse)):
    anime_id_map[anime_id_map_reverse[idx]] = idx

In [11]:
def convert_raw(data):
    out = []
    for row in data:
        if (row["anime_id"] in anime_id_map_reverse):
            out.append(row)
    return out

In [12]:
usernames = ["greenmythos", "fingoldin"]
test_users = [ convert_raw(get_user_anime(username)) for username in usernames ]

In [10]:
test_users = [ [
  { "anime_id": d['Shingeki no Kyojin'], "score": 8.0 },
  { "anime_id": d['One Punch Man'], "score": 6.0 },
  { "anime_id": 21, "score": 8.0 }
  ],
  [{ "anime_id": d['Angel Beats!'], "score": 10.0 }],
  [{ "anime_id": d['Kimi no Na wa'], "score": 10.0 }],
  [
  { "anime_id": d['Angel Beats!'], "score": 10.0 },
  { "anime_id": d['Kimi no Na wa'], "score": 10.0 }
  ],
  [
  { "anime_id": d['Shingeki no Kyojin'], "score": 9.0 },
  { "anime_id": d['Death Note'], "score": 10.0 },
  { "anime_id": d['Tengen Toppa Gurren Lagann'], "score": 10.0 },
  { "anime_id": d['One Punch Man'], "score": 10.0 }
  ],
  [
  { "anime_id": d['Haikyuu!!'], "score": 10.0 },
  { "anime_id": d['Haikyuu!! Second Season'], "score": 10.0 }
  ],
  [{ "anime_id": d['Planetus'], "score": 10.0 }],
  [
  { "anime_id": d['Noragami'], "score": 9.0 },
  { "anime_id": d['Made in Abyss'], "score": 10.0 },
  { "anime_id": d['Hunter x Hunter (2011)'], "score": 10.0 },
  { "anime_id": d['Boku no Hero Academia'], "score": 10.0 },
  { "anime_id": d['Fairy Tail'], "score": 8.0 }
  ]
]

In [15]:
def kernel(user1, user2, penalty=2.0):
    product = np.ceil(user1/10.0) * np.ceil(user2/10.0)
    dif = penalty * (user1 - user2) * product
    n = np.sum(product)
    return math.exp(-math.sqrt(np.sum(dif * dif))/n) if n != 0 else 0

def predict(test_user,scale=1.0,threshold=0.0,variety=0.85,penalty=2.0):
    anime = np.zeros(len(test_user))
    watchers = np.power(total_watchers,variety)
    for user in users_array:
        s = kernel(test_user, user, penalty)
        temp = np.power(user,scale)
        anime = anime + s * (temp - np.ceil(user/10.0) * math.pow(threshold,scale))
    anime = anime / watchers

    anime = (1 - np.ceil(test_user / 10.0)) * anime
    best_anime_idx = np.argpartition(anime, -10)[-10:]
    best_anime = anime[best_anime_idx]
    best_anime = np.stack((best_anime.T, best_anime_idx.T)).T
    best_anime_idx = best_anime[best_anime[:,0].argsort()[::-1]][:,1].T
    return np.array(best_anime_idx,dtype=np.int)

def convert_test(test_user):
    test_vector = np.zeros(len(users.columns))
    for anime in test_user:
        test_vector[anime_id_map[anime["anime_id"]]] = anime["score"]

    return test_vector

def convert_predict(anime_id):
    return all_anime[all_anime["anime_id"] == anime_id_map_reverse[anime_id]].iloc[0]["title"]

In [19]:
def full_predict(test_users,scale=1.0,threshold=0.0,variety=0.85,penalty=2.0):
    print("Variety: ", variety, "  Scale: ", scale, "  Threshold: ", threshold, "  Penalty: ", penalty)
    for test_user in test_users:
        pred = predict(convert_test(test_user),scale,threshold,variety,penalty)
        string = ''
        for i in range(len(pred)):
            string += str(i+1) + '. ' + convert_predict(pred[i]) + "   "
        print(string + '\n')

In [18]:
for i in range(4):
    for j in range(4):
        full_predict(test_users, 1.0 - j/12, 0.0 + j, 0.6 + 0.1 * i, 2.0)

Variety:  0.6
Scale:  1.0
Threshold:  0.0
Penalty:  2.0
1. Code Geass: Hangyaku no Lelouch R2   2. Sword Art Online   3. Steins;Gate   4. Sen to Chihiro no Kamikakushi   5. Clannad   6. Fullmetal Alchemist   7. No Game No Life   8. Naruto   9. Elfen Lied   10. Durarara!!   

1. Death Note   2. Shingeki no Kyojin   3. Sen to Chihiro no Kamikakushi   4. Steins;Gate   5. Fullmetal Alchemist: Brotherhood   6. Code Geass: Hangyaku no Lelouch   7. Angel Beats!   8. Toradora!   9. Code Geass: Hangyaku no Lelouch R2   10. Sword Art Online   

Variety:  0.6
Scale:  0.9166666666666666
Threshold:  1.0
Penalty:  2.0
1. Code Geass: Hangyaku no Lelouch R2   2. Sword Art Online   3. Steins;Gate   4. Sen to Chihiro no Kamikakushi   5. Clannad   6. Fullmetal Alchemist   7. No Game No Life   8. Durarara!!   9. Naruto   10. Elfen Lied   

1. Death Note   2. Shingeki no Kyojin   3. Sen to Chihiro no Kamikakushi   4. Fullmetal Alchemist: Brotherhood   5. Steins;Gate   6. Code Geass: Hangyaku no Lelouch   7

1. Boku no Hero Academia 2nd Season   2. Boku no Hero Academia   3. Re:Zero kara Hajimeru Isekai Seikatsu   4. No Game No Life   5. Noragami Aragoto   6. Boku dake ga Inai Machi   7. Nanatsu no Taizai   8. Ansatsu Kyoushitsu 2nd Season   9. Magi: The Kingdom of Magic   10. Noragami   

1. Koe no Katachi   2. Shigatsu wa Kimi no Uso   3. Steins;Gate   4. One Punch Man   5. Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai.   6. Re:Zero kara Hajimeru Isekai Seikatsu   7. No Game No Life   8. Ookami Kodomo no Ame to Yuki   9. Steins;Gate Movie: Fuka Ryouiki no Déjà vu   10. Fate/Zero 2nd Season   

Variety:  0.9
Scale:  0.75
Threshold:  3.0
Penalty:  2.0
1. Boku no Hero Academia 2nd Season   2. Boku no Hero Academia   3. Re:Zero kara Hajimeru Isekai Seikatsu   4. Steins;Gate   5. Boku dake ga Inai Machi   6. No Game No Life   7. Noragami Aragoto   8. Code Geass: Hangyaku no Lelouch R2   9. Ansatsu Kyoushitsu 2nd Season   10. Magi: The Kingdom of Magic   

1. Koe no Katachi   2. Stei

In [21]:
full_predict(test_users, 0.8, 4.5, 0.85, 2.5)

Variety:  0.85   Scale:  0.8   Threshold:  4.5   Penalty:  2.5
1. Steins;Gate   2. Code Geass: Hangyaku no Lelouch R2   3. Boku no Hero Academia 2nd Season   4. Boku dake ga Inai Machi   5. No Game No Life   6. Clannad: After Story   7. Re:Zero kara Hajimeru Isekai Seikatsu   8. Fate/Zero 2nd Season   9. Boku no Hero Academia   10. Kiseijuu: Sei no Kakuritsu   

1. Steins;Gate   2. Koe no Katachi   3. Fullmetal Alchemist: Brotherhood   4. One Punch Man   5. Shigatsu wa Kimi no Uso   6. Sen to Chihiro no Kamikakushi   7. Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai.   8. Shingeki no Kyojin   9. Suzumiya Haruhi no Shoushitsu   10. Fate/Zero 2nd Season   

