# Anime User Based

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import scipy.sparse
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

In [2]:
anime = pd.read_csv('../anime-recommendations-database/anime.csv')
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


anime_id = ID identifying an anime
genre = list of genres for this anime
rating = average rating out of 10 for this anime

In [3]:
animeRating = pd.read_csv('../anime-recommendations-database/rating.csv')
animeRating.tail()

Unnamed: 0,user_id,anime_id,rating
7813732,73515,16512,7
7813733,73515,17187,9
7813734,73515,22145,10
7813735,73516,790,9
7813736,73516,8074,9


anime_id = anime the user has rated
rating = rating out of 10 (-1 if user watched but didn't assign a rating)

### Cleaning Data for Missing Values

In [4]:
#Checking which values are null
print anime.isnull().sum()
print animeRating.isnull().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64
user_id     0
anime_id    0
rating      0
dtype: int64


In [5]:
anime['genre'] = anime['genre'].fillna('None')
anime['type'] = anime['type'].fillna('None')
anime['rating'] = anime['rating'].fillna('None')
anime.isnull().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [6]:
animeRating = animeRating[animeRating.rating > 0]
animeRating.rating.unique()

array([10,  8,  6,  9,  7,  3,  5,  4,  1,  2], dtype=int64)

In [7]:
fullMergedAnime = animeRating.merge(anime, left_on = 'anime_id', right_on = 'anime_id', suffixes= ['_user', ''])
fullMergedAnime.head()

Unnamed: 0,user_id,anime_id,rating_user,name,genre,type,episodes,rating,members
0,1,8074,10,Highschool of the Dead,"Action, Ecchi, Horror, Supernatural",TV,12,7.46,535892
1,3,8074,6,Highschool of the Dead,"Action, Ecchi, Horror, Supernatural",TV,12,7.46,535892
2,5,8074,2,Highschool of the Dead,"Action, Ecchi, Horror, Supernatural",TV,12,7.46,535892
3,12,8074,6,Highschool of the Dead,"Action, Ecchi, Horror, Supernatural",TV,12,7.46,535892
4,14,8074,6,Highschool of the Dead,"Action, Ecchi, Horror, Supernatural",TV,12,7.46,535892


### Splitting and Normalizing Data

In [8]:
mergedSubAnime = fullMergedAnime[['user_id', 'anime_id', 'name', 'rating_user']]

#Limiting the dataframe
mergedSubAnime = mergedSubAnime[mergedSubAnime.user_id <= 6000]

In [9]:
trainData, testData = train_test_split(mergedSubAnime, test_size=0.2)

In [10]:
piv = trainData.pivot_table(index=['user_id'], columns=['name'], values='rating_user')
print(piv.shape)

(5614, 6809)


In [11]:
piv.head()

name,&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hatsukoi,&quot;Bungaku Shoujo&quot; Memoire,&quot;Bungaku Shoujo&quot; Movie,.hack//G.U. Returner,.hack//G.U. Trilogy,.hack//G.U. Trilogy: Parody Mode,.hack//Gift,.hack//Intermezzo,.hack//Liminality,.hack//Quantum,...,gdgd Fairies Movie: tte Iu Eiga wa Dou kana...?,iDOLM@STER Xenoglossia,iDOLM@STER Xenoglossia Specials,s.CRY.ed,xxxHOLiC,xxxHOLiC Kei,xxxHOLiC Movie: Manatsu no Yoru no Yume,xxxHOLiC Rou,xxxHOLiC Shunmuki,◯
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,2.0,,,,,
7,,,,,,,,,,,...,,,,,,,,,,


In [12]:
piv_norm = piv.apply(lambda x: (x-np.mean(x))/(np.max(x)-np.min(x)), axis=1)
piv_norm.fillna(0, inplace=True)

In [13]:
piv_norm = piv_norm.T

In [14]:
piv_norm = piv_norm.loc[:, (piv_norm != 0).any(axis=0)]

In [17]:
piv_norm.head()

user_id,3,5,7,8,11,12,14,16,17,18,...,5986,5989,5990,5991,5992,5993,5994,5997,5999,6000
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hatsukoi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
&quot;Bungaku Shoujo&quot; Memoire,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
&quot;Bungaku Shoujo&quot; Movie,0.0,0.0,0.0,0.0,0.0,0.0,0.169231,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.hack//G.U. Returner,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.hack//G.U. Trilogy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
piv_sparse = sp.sparse.csr_matrix(piv_norm.values)

### User Cosine Similarity

In [22]:
userCosineSimilarity = cosine_similarity(piv_sparse.T)
print userCosineSimilarity

[[ 1.          0.04260516  0.15566908 ...  0.04234534  0.
   0.01840256]
 [ 0.04260516  1.          0.04122464 ...  0.050485   -0.0086376
   0.01605437]
 [ 0.15566908  0.04122464  1.         ...  0.04747978  0.
  -0.01182892]
 ...
 [ 0.04234534  0.050485    0.04747978 ...  1.          0.04071469
   0.0823297 ]
 [ 0.         -0.0086376   0.         ...  0.04071469  1.
   0.        ]
 [ 0.01840256  0.01605437 -0.01182892 ...  0.0823297   0.
   1.        ]]


In [23]:
userSimData = pd.DataFrame(userCosineSimilarity, index = piv_norm.columns, columns = piv_norm.columns)

In [24]:
userSimData.head()

user_id,3,5,7,8,11,12,14,16,17,18,...,5986,5989,5990,5991,5992,5993,5994,5997,5999,6000
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3,1.0,0.042605,0.155669,0.010605,0.082607,0.065477,0.0126,0.059192,0.132059,0.045978,...,0.012936,0.032764,0.062925,0.067224,-0.017127,-0.019331,0.044599,0.042345,0.0,0.018403
5,0.042605,1.0,0.041225,-0.011078,0.031792,0.061975,0.10506,-0.003933,0.094331,-0.008005,...,0.002288,0.0,0.0,0.012426,0.035405,-0.025907,0.087275,0.050485,-0.008638,0.016054
7,0.155669,0.041225,1.0,-0.013438,-0.036203,0.062923,0.063765,0.035836,0.099306,0.022571,...,0.038438,-0.050015,-0.022988,0.022932,-0.021682,-0.028583,0.052672,0.04748,0.0,-0.011829
8,0.010605,-0.011078,-0.013438,1.0,0.013833,0.007802,-0.085993,0.0,0.001276,-0.094697,...,0.0,0.0,0.0,0.0,0.0,-0.050668,-0.007846,0.004765,0.0,0.0
11,0.082607,0.031792,-0.036203,0.013833,1.0,0.036085,-0.004367,0.042595,0.030922,0.017031,...,-0.005035,0.015956,0.017943,0.027839,0.108587,-0.021089,0.037101,0.024229,-0.137683,0.005248


In [70]:
userSimData[11].keys()

Int64Index([   3,    5,    7,    8,   11,   12,   14,   16,   17,   18,
            ...
            5986, 5989, 5990, 5991, 5992, 5993, 5994, 5997, 5999, 6000],
           dtype='int64', name=u'user_id', length=5120)

In [133]:
def all_Similar_Users(currentUser):
    if currentUser not in piv_norm.columns:
        return("No data available for {}".format(currentUser))
    
    similarUsers = userSimData.sort_values(by=currentUser, ascending=False).index
    similarUsersResults = userSimData.sort_values(by=currentUser, ascending=False).loc[:,currentUser].tolist()
    similarUsersZipped = zip(similarUsers, similarUsersResults)
#     print similarUsersZipped
    
    return similarUsersZipped

In [134]:
# print all_Similar_Users(7)

[(7L, 1.000000000000003), (693L, 0.2136144591469744), (4762L, 0.1771748480680781), (2723L, 0.17482591151513238), (3038L, 0.16834741165157077), (2634L, 0.15848300601750306), (3L, 0.1556690844733206), (656L, 0.15018327083710561), (5127L, 0.1500995071783109), (559L, 0.1489879321817369), (4379L, 0.14889510999224212), (2740L, 0.14822274708672098), (2281L, 0.14780825460240596), (969L, 0.14725742591532992), (1106L, 0.14570332312651219), (3239L, 0.14566165356275698), (166L, 0.14436692341080898), (4820L, 0.1438631107853004), (837L, 0.13862626708852838), (725L, 0.13839925100256548), (345L, 0.13754124985474375), (2411L, 0.13707195069960115), (3681L, 0.13600393248463746), (1053L, 0.13506444827097522), (2554L, 0.13495056028065852), (1913L, 0.13432599944315737), (2378L, 0.13365739342029087), (2986L, 0.13253402557899346), (2926L, 0.13190073727147628), (1938L, 0.13173760155000047), (1512L, 0.13139298961613727), (248L, 0.1313797053748766), (1860L, 0.13122150502060245), (1682L, 0.13116358791295515), (27

In [35]:
def avgRatingsUser(user):
    userRatings = piv.loc[user, :]
    avgUserRatingList = 0.0
    counter = 0
    for rating in userRatings:
        if not np.isnan(rating):
            avgUserRatingList += rating
            counter += 1
        else:
            continue
    return avgUserRatingList/counter

In [141]:
# Define for current existing user that has some data available

def ratingsUserAnime(currentUser, animeName, similarUsers):
    ratingWeight = 0.0
    similarityWeight = 0.0
    avgCurrentUserRating = avgRatingsUser(currentUser)
#     similarUsers = userSimData[currentUser]
    
    if currentUser in piv_norm.columns:
        for user, userSimilarity in similarUsers:
            #Average of the similar user ratings
            avgUserRating = avgRatingsUser(user)
            
            #Find the rated anime
            userAnimeRating = piv.loc[user, animeName]
#             print userAnimeRating
            if np.isnan(userAnimeRating):
                continue
            elif not np.isnan(userAnimeRating):
#                 print userAnimeRating
                ratingWeight += (userAnimeRating - avgUserRating)*userSimilarity
                similarityWeight += abs(userSimilarity)
    else:
        return ("No data available for: {}".format(currentUser))
    
    if similarityWeight > 0:
        return avgCurrentUserRating + (ratingWeight/similarityWeight)
    else:
        return avgCurrentUserRating

In [142]:
similarUsers = all_Similar_Users(7)
# print ratingsUserAnime(10, "Zombie-Loan", similarUsers
print ratingsUserAnime(11, "Zombie-Loan", similarUsers)

6.618191314195057


In [147]:
def predictUserRatings(currentUser):
    ratingsNone = {}
    animeColumn = list(piv.columns.values)
    currentUserRatings = piv.loc[currentUser, :].tolist()
    currentUserAnime = dict(zip(animeColumn, currentUserRatings))
    similarUsers = all_Similar_Users(currentUser)
#     print len(currentUserAnime)
    
#     print len(currentUserAnime)
    counter = 0
    for animeName, animeRating in currentUserAnime.items():
        if np.isnan(animeRating):
            counter += 1
#             ratingsNone[animeName] = ratingsUserAnime(currentUser, animeName, similarUsers)
#             print ("AnimeName: {}, Rating: {}".format(animeName, ratingsNone[animeName]))
    
    return ratingsNone
        

In [148]:
predictUserRatings(7)

6809


{}

### User Pearson Similarity

In [26]:
piv_user = piv_norm.values
userCovariance = np.cov(piv_user.T)

In [27]:
userPearsonSimilarity = np.corrcoef(userCovariance)
# print testing2

In [28]:
userPearsonSimData = pd.DataFrame(userPearsonSimilarity, index = piv_norm.columns, columns = piv_norm.columns)

In [29]:
userPearsonSimData.head()

user_id,3,5,7,8,10,11,12,14,16,17,...,4990,4991,4992,4993,4995,4996,4997,4998,4999,5000
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3,1.0,0.390599,0.540247,0.131393,0.230468,0.338315,0.442852,0.349422,0.321334,0.624789,...,0.116088,0.260055,0.225033,0.341106,-0.13732,0.39318,0.087866,0.346094,0.247075,0.571032
5,0.390599,1.0,0.456861,0.003083,0.264419,0.175298,0.321363,0.551465,0.165403,0.601768,...,0.158756,0.292935,0.235149,0.498507,0.027153,0.121315,0.183013,0.366469,0.259727,0.615792
7,0.540247,0.456861,1.0,0.045588,0.146607,0.105991,0.374586,0.543455,0.315055,0.661314,...,0.160566,0.263536,0.259735,0.366285,-0.049507,0.292652,-0.016443,0.459601,0.437305,0.638354
8,0.131393,0.003083,0.045588,1.0,-0.161154,0.059567,-0.006389,-0.029386,0.02651,0.016668,...,-0.044372,-0.007285,0.001524,-0.020292,0.002354,0.010841,-0.022189,0.034068,-0.010836,0.096772
10,0.230468,0.264419,0.146607,-0.161154,1.0,0.049037,0.224339,0.223979,0.081027,0.313504,...,0.068301,0.191987,0.059808,0.359261,0.037801,0.113908,0.107205,0.159429,0.140987,0.232634


In [30]:
def top_Similar_Pearson_Users(currentUser):
    if currentUser not in piv_norm.columns:
        return("No data available for {}".format(currentUser))
    
    similarUsers = userPearsonSimData.sort_values(by=currentUser, ascending=False).index[1:20]
    similarUsersResults = userPearsonSimData.sort_values(by=currentUser, ascending=False).loc[:,currentUser].tolist()[1:20]
    similarUsersZipped = zip(similarUsers, similarUsersResults)
#     print similarUsersZipped
    
    return similarUsersZipped

In [31]:
similarPearsonUsers = top_Similar_Pearson_Users(7)
print ratingsUserAnime(7, "Zombie-Loan", similarPearsonUsers)

6.399927865841151


In [32]:
def predictPearsonUserRatings(currentUser):
    ratingsNone = {}
    animeColumn = list(piv.columns.values)
    currentUserRatings = piv.loc[currentUser, :].tolist()
    currentUserAnime = dict(zip(animeColumn, currentUserRatings))
    similarUsers = top_Similar_Pearson_Users(currentUser)
#     print len(currentUserAnime)
    
    for animeName, animeRating in currentUserAnime.items():
        if np.isnan(animeRating):
            ratingsNone[animeName] = ratingsUserAnime(currentUser, animeName, similarUsers)
            print ("User: {}, Rating: {}".format(user, ratingsNone[user]))
    
    return ratingsNone

### Item Cosine Similarity

In [33]:
itemCosineSimilarity = cosine_similarity(piv_sparse)

In [34]:
itemSimData = pd.DataFrame(itemCosineSimilarity, index = piv_norm.index, columns = piv_norm.index)

In [35]:
itemSimData.head()

name,&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hatsukoi,&quot;Bungaku Shoujo&quot; Memoire,&quot;Bungaku Shoujo&quot; Movie,.hack//G.U. Returner,.hack//G.U. Trilogy,.hack//G.U. Trilogy: Parody Mode,.hack//Gift,.hack//Intermezzo,.hack//Liminality,.hack//Quantum,...,gdgd Fairies 2,iDOLM@STER Xenoglossia,iDOLM@STER Xenoglossia Specials,s.CRY.ed,xxxHOLiC,xxxHOLiC Kei,xxxHOLiC Movie: Manatsu no Yoru no Yume,xxxHOLiC Rou,xxxHOLiC Shunmuki,◯
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hatsukoi,1.0,0.172085,0.186285,0.020928,0.061367,0.032293,0.004453,-0.04345,-0.017462,0.017125,...,0.0,-0.005201,0.023101,0.0,0.075029,0.106081,0.133147,0.151144,0.079971,0.0
&quot;Bungaku Shoujo&quot; Memoire,0.172085,1.0,0.388434,-0.020467,0.037005,-0.059632,-0.007363,-0.052221,-0.033056,0.03657,...,0.0,0.004336,-0.042659,-0.048821,-0.041747,-0.050302,-0.045607,-0.016375,-0.091434,0.0
&quot;Bungaku Shoujo&quot; Movie,0.186285,0.388434,1.0,-0.048346,0.044058,-0.027781,-0.023449,-0.10717,-0.048258,0.030136,...,0.0,0.00202,-0.019874,-0.00747,-0.037358,-0.038744,-0.018042,0.028878,0.00096,0.0
.hack//G.U. Returner,0.020928,-0.020467,-0.048346,1.0,0.559906,-0.149,0.371347,0.633474,0.236892,0.199659,...,0.0,-0.002475,0.026172,0.0,-0.052349,-0.068782,-0.076498,-0.104512,-0.141105,0.0
.hack//G.U. Trilogy,0.061367,0.037005,0.044058,0.559906,1.0,-0.153814,0.194511,0.290773,0.062776,0.073942,...,0.0,-0.001661,0.017569,-0.017556,-0.027834,-0.009529,0.008717,0.005253,-0.049467,0.0


In [36]:
def top_Similar_Animes(anime):
    similarAnime = itemSimData.sort_values(by=anime, ascending=False).index[1:20]
    similarAnimeResults = itemSimData.sort_values(by=anime, ascending=False).loc[:,anime].tolist()[1:20]
    similarAnimeZipped = zip(similarAnime, similarAnimeResults)
#     print similarUsersZipped
    
    return similarAnimeZipped

In [37]:
def avgRatingItem(item):
    itemRatings = piv.loc[:, item]
    avgItemRatingList = []
    for rating in itemRatings:
        if not np.isnan(rating):
            avgItemRatingList.append(rating)
        else:
            continue
    return sum(avgItemRatingList)/len(avgItemRatingList)

In [38]:
def ratingsItemAnime(currentUser, animeName, similarItems):
    ratingWeight = []
    similarityWeight = []
    avgCurrentItemRating = avgRatingItem(animeName)
    
    if currentUser in piv_norm.columns:
        for item, itemSimilarity in similarItems:
            #Average of the similar item ratings
            avgItemRating = avgRatingItem(item)
            
            #Find the rated anime
            itemAnimeRating = piv.loc[currentUser, item]
            if np.isnan(itemAnimeRating):
                continue
            elif not np.isnan(itemAnimeRating):
                ratingWeight.append((itemAnimeRating - avgItemRating)*itemSimilarity)
                similarityWeight.append(abs(itemSimilarity))
    else:
        return ("No data available for: {}".format(currentUser))
    
    if sum(similarityWeight) > 0:
        return avgCurrentItemRating + (sum(ratingWeight)/sum(similarityWeight))
    else:
        return avgCurrentItemRating

In [39]:
similarAnimes = top_Similar_Animes("Zombie-Loan")
print ratingsItemAnime(7, "Zombie-Loan", similarAnimes)

6.392857142857143


In [40]:
def predictItemRatings(currentItem):
    ratingsNone = {}
    animeRow = list(piv.index.values)
    currentItemRatings = piv.loc[:, currentItem].tolist()  
    currentUserAnime = dict(zip(animeRow, currentItemRatings))
    similarAnimes = top_Similar_Animes(currentItem)
#     print len(currentUserAnime)
#     similarUsers = top_Similar_Users(currentUser)
#     print currentUserAnime
    
    for user, animeRating in currentUserAnime.items():
        if np.isnan(animeRating):
            ratingsNone[user] = ratingsItemAnime(user, currentItem, similarAnimes)
#             print ("User: {}, Rating: {}".format(user, ratingsNone[user]))
    return ratingsNone
    

In [None]:
# predictItemRatings("Zombie-Loan")

{1: 'No data available for: 1',
 2: 'No data available for: 2',
 5: 7.192857142857143,
 7: 6.392857142857143,
 8: 7.192857142857143,
 9: 'No data available for: 9',
 10: 7.192857142857143,
 11: 7.192857142857143,
 12: 7.192857142857143,
 14: 7.192857142857143,
 16: 7.192857142857143,
 17: 7.192857142857143,
 18: 7.192857142857143,
 19: 7.192857142857143,
 20: 7.192857142857143,
 21: 7.192857142857143,
 22: 'No data available for: 22',
 23: 7.192857142857143,
 24: 7.192857142857143,
 25: 7.192857142857143,
 26: 7.192857142857143,
 27: 7.192857142857143,
 28: 7.192857142857143,
 29: 7.192857142857143,
 30: 7.192857142857143,
 31: 7.192857142857143,
 32: 7.192857142857143,
 33: 7.192857142857143,
 34: 7.192857142857143,
 35: 'No data available for: 35',
 36: 7.192857142857143,
 37: 7.192857142857143,
 38: 3.4201298701298706,
 39: 7.192857142857143,
 40: 7.192857142857143,
 41: 7.192857142857143,
 42: 7.192857142857143,
 43: 6.420129870129871,
 44: 7.192857142857143,
 45: 7.192857142857143

### Item Pearson Similarity

In [None]:
piv_item = piv_norm.values
itemCovariance = np.cov(piv_item)

In [None]:
itemPearsonSimilarity = np.corrcoef(itemCovariance)

In [None]:
itemPearsonSimData = pd.DataFrame(itemPearsonSimilarity, index = piv_norm.index, columns = piv_norm.index)

In [None]:
itemPearsonSimData.head()

In [None]:
def top_Pearson_Similar_Animes(anime):
    similarAnime = itemPearsonSimData.sort_values(by=anime, ascending=False).index[1:20]
    similarAnimeResults = itemPearsonSimData.sort_values(by=anime, ascending=False).loc[:,anime].tolist()[1:20]
    similarAnimeZipped = zip(similarAnime, similarAnimeResults)
#     print similarUsersZipped
    
    return similarAnimeZipped

In [None]:
def predictPearsonItemRatings(currentItem):
    ratingsNone = {}
    animeRow = list(piv.index.values)
    currentItemRatings = piv.loc[:, currentItem].tolist()  
    currentUserAnime = dict(zip(animeRow, currentItemRatings))
    similarAnimes = top_Pearson_Similar_Animes(currentItem)
#     print len(currentUserAnime)
#     similarUsers = top_Similar_Users(currentUser)
#     print currentUserAnime
    
    for user, animeRating in currentUserAnime.items():
        if np.isnan(animeRating):
            ratingsNone[user] = ratingsItemAnime(user, currentItem, similarAnimes)
#             print ("User: {}, Rating: {}".format(user, ratingsNone[user]))
    return ratingsNone
    

In [None]:
predictPearsonItemRatings("Zombie-Loan")

### Results

In [None]:
def get_mean_absolute_error():
    mean_absolute_error = 0.0
    n = 0.0
    test_users_rating_map = generate_ratings_users_map(test_users, test_ratings)
    test_5000 = test_users_rating_map.keys()[0:5000]
    for user_id in test_5000:
        user_ratings = test_users_rating_map[user_id]
        for rating in user_ratings:
            movie_id = rating['movie']
            actual_rating = float(rating['rating'])
            predicted_rating = predict_rating(user_id, movie_id)
            mean_absolute_error += abs(actual_rating - predicted_rating)
            n += 1
    return mean_absolute_error / n
mean_absolute_error = get_mean_absolute_error()

In [None]:
print mean_absolute_error