# 1. Pre-processing Data <br/>
Here we import everything needed, including packages and datasets. We also subset the data and remove invalid ratings.

In [1]:
import pandas as pd
import numpy as np
import math
import scipy.sparse
from lightfm import LightFM
from lightfm import cross_validation
from lightfm.evaluation import *

anime = pd.read_csv('anime-recommendations-database/anime.csv')
rating = pd.read_csv('anime-recommendations-database/rating.csv')



In [2]:
def normalize_rating(rating):
    result = normalzieRating(rating)
    rating_normalized = pd.merge(rating, result, on='user_id')
    rating_normalized = rating_normalized.replace(-1, 0)
    rating_normalized['rating'] = (rating_normalized['rating'] - rating_normalized['mean'])/rating_normalized['std']
    return result, rating_normalized

def normalzieRating(rating):
    rating = rating.replace(-1, 0)
    ret = pd.merge(rating[['user_id', 'rating']].groupby('user_id').mean(),
                   rating[['user_id', 'rating']].groupby('user_id').std(), 
                   left_index=True, right_index=True)
    ret = ret.fillna(1)
    ret = ret.replace(0,1)
    ret = ret.rename(index=str, columns={"rating_x": "mean", "rating_y": "std"})
    ret['user_id'] = pd.to_numeric(ret.index)
    ret = ret.reset_index(drop=True)
    return ret

rating_bias, rating = normalize_rating(rating)

In [18]:
MAX_ITEM = 12294
MAX_USER = 73515
numUser = math.floor(MAX_USER*0.5)
numItem = math.floor(MAX_ITEM*0.5)

rating_sample = rating[rating["user_id"].isin(range(1,numUser+1))]
rating_sample = rating_sample[rating_sample["anime_id"].isin(range(1,numItem+1))]
rating_sample = rating_sample[rating_sample.rating != -1].sample(frac=1).reset_index(drop=True)

ani_sample = anime[anime["anime_id"].isin(range(1,numItem+1))]

In [19]:
s = rating_sample[['user_id', 'anime_id']].groupby('user_id').count()
s = s.sort_values(by=['anime_id'])
mats = np.array_split(s, 10)

In [40]:
# Here we create the interaction matrix and item feature matrix
def getMatrix(m):
    mat = rating_sample[rating_sample['user_id'].isin(m.index.values)]
    ## interaction
    R_df = mat.pivot(index = 'user_id', columns = 'anime_id', values = 'rating').fillna(0)
    interaction = scipy.sparse.coo_matrix(R_df.values)
    int_train,int_test = cross_validation.random_train_test_split(interaction, test_percentage=0.2, random_state=None)
    ## item feature
    test = (ani_sample.sort_values(by=['anime_id'])).set_index('anime_id')

#     aniType = pd.get_dummies(test['type'])
#     genre = test['genre'].str.get_dummies(sep=',')
#     epi = test['episodes'].replace({'Unknown': None})
#     epi = pd.to_numeric(epi)
#     epi = epi.replace({None: epi.mean()})
#     epi = (epi-epi.mean())/epi.std()
#     avgRating = test['rating'].fillna(test['rating'].mean())

    #item_feature = pd.concat([genre,aniType,epi,avgRating],axis=1)
    #item_feature = scipy.sparse.csr_matrix(item_feature.values)
    
    
    return (int_train,int_test,item_feature)

# 2. Setup model

In [21]:
model = LightFM(loss='warp')

In [22]:
pak = []
for m in mats:
    int_train,int_test,item_feature = getMatrix(m)
    model.fit(int_train, epochs=30)
    test_precision = auc_score(model, int_test, int_train).mean()
    #print(test_precision)
    pak.append(test_precision)
    
pak

[0.81667215,
 0.85944694,
 0.88485676,
 0.90531117,
 0.91865367,
 0.9239209,
 0.92959243,
 0.93111825,
 0.9305503,
 0.9259246]

In [59]:
pakf = []
for m in mats:
    int_train,int_test,item_feature = getMatrix(m)    
    model.fit(int_train, epochs=100,item_features=item_feature)
    test_precision = auc_score(model, int_test, int_train,item_features=item_feature).mean()
    #print(test_precision)
    pakf.append(test_precision)
    
pakf

[-0.00010513488,
 -0.00022864049,
 -0.00041338775,
 -0.0006107773,
 -0.0008880728,
 -0.0011174896,
 -0.0014368581,
 -0.0018594806,
 -0.0025783153,
 -0.0051095523]

# 3. BELOW ARE IRRELEVANT PART!!!!!

# IRRELEVANT!!!!!!

In [48]:
int_train,int_test,item_feature = getMatrix(mats[0])
item_feature.shape

(4480, 84)

In [67]:
test = (ani_sample.sort_values(by=['anime_id']))
type(test['anime_id'])

pandas.core.series.Series

In [69]:
item_features = test.build_item_features(((row['anime_id'], [row['type'],row['genre'],row['rating'],row['episodes']])
                                              for row in test.iterrows()))
item_features

AttributeError: 'DataFrame' object has no attribute 'build_item_features'

In [85]:
rateTest = rating_sample[rating_sample['user_id'].isin(mats[6].index.values)]

In [92]:
from lightfm.data import Dataset

dataset = Dataset()
dataset.fit((x[1]['user_id'] for x in rateTest.iterrows()),
            (x[1]['anime_id'] for x in rateTest.iterrows()))
dataset.fit_partial(items=(x[1]['anime_id'] for x in test.iterrows()),
                    item_features=(x[1]['type'] for x in test.iterrows()))
dataset.fit_partial(items=(x[1]['anime_id'] for x in test.iterrows()),
                    item_features=(x[1]['genre'] for x in test.iterrows()))
dataset.fit_partial(items=(x[1]['anime_id'] for x in test.iterrows()),
                    item_features=(x[1]['episodes'] for x in test.iterrows()))
dataset.fit_partial(items=(x[1]['anime_id'] for x in test.iterrows()),
                    item_features=(x[1]['rating'] for x in test.iterrows()))
dataset

<lightfm.data.Dataset at 0x1107cf0b8>

In [93]:
item_features = dataset.build_item_features(((x[1]['anime_id'], [x[1]['type'],x[1]['genre'],x[1]['episodes'],x[1]['rating']])
                                              for x in test.iterrows()))
item_features

ValueError: Feature nan not in eature mapping. Call fit first.

In [80]:
for x in test.iterrows():
    print(x[1])
    break

anime_id                                                  1
name                                           Cowboy Bebop
genre       Action, Adventure, Comedy, Drama, Sci-Fi, Space
type                                                     TV
episodes                                                 26
rating                                                 8.82
members                                              486824
Name: 22, dtype: object


In [87]:
test

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
22,1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,8.82,486824
152,5,Cowboy Bebop: Tengoku no Tobira,"Action, Drama, Mystery, Sci-Fi, Space",Movie,1,8.40,137636
214,6,Trigun,"Action, Comedy, Sci-Fi",TV,26,8.32,283069
2095,7,Witch Hunter Robin,"Action, Drama, Magic, Mystery, Police, Superna...",TV,26,7.36,64905
3159,8,Beet the Vandel Buster,"Adventure, Fantasy, Shounen, Supernatural",TV,52,7.06,9848
433,15,Eyeshield 21,"Action, Comedy, Shounen, Sports",TV,145,8.08,83648
325,16,Hachimitsu to Clover,"Comedy, Drama, Josei, Romance",TV,24,8.18,130646
976,17,Hungry Heart: Wild Striker,"Comedy, Shounen, Slice of Life, Sports",TV,52,7.74,13469
263,18,Initial D Fourth Stage,"Action, Cars, Drama, Seinen, Sports",TV,24,8.24,41584
38,19,Monster,"Drama, Horror, Mystery, Police, Psychological,...",TV,74,8.72,247562
