# Modeling #
Here, we will load data for a specific user id, and use that data in order to create a model based on the specific user's preferences and likes. The model used is SVD and pearson's R which will automatically give a list of recommendations of shows to watch based on the previously known data

In [None]:
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

reader = Reader()

dataset = Dataset.load_from_df(user_df[['user_id', 'anime_id', 'rating']], reader)

svd = SVD()
cross_validate(svd, dataset, measures=['RMSE', 'MAE'])



{'test_rmse': array([3.11409094, 3.11588406, 3.11269079, 3.11478178, 3.11256255]),
 'test_mae': array([2.80129082, 2.80346401, 2.79941106, 2.80086403, 2.79984092]),
 'fit_time': (225.01878213882446,
  223.42327976226807,
  248.03472208976746,
  255.4715497493744,
  253.52299976348877),
 'test_time': (17.800500869750977,
  16.295000076293945,
  15.787002563476562,
  18.506999492645264,
  21.80700397491455)}

Here we will load the preferences of user 66789 and create a model based on that user, anything beyond this point can be used with whatever user id that exists

In [None]:
df_66789 = user_df[(user_df['user_id'] == 66789) & (user_df['rating'] >= 7)]

df_66789 = df_66789.set_index('anime_id')
df_66789 = df_66789.join(anime_df, lsuffix = '_user', rsuffix = '_mal')['name']
df_66789.dropna(inplace=True)
df_66789

anime_id
1                         Fullmetal Alchemist: Brotherhood
18                            Ookami Kodomo no Ame to Yuki
20                                          Hajime no Ippo
24                                           Mononoke Hime
33                     Mushishi Zoku Shou: Suzu no Shizuku
43                                               Haikyuu!!
47                                 Ping Pong The Animation
57       Ano Hi Mita Hana no Namae wo Bokutachi wa Mada...
67                              Kiseijuu: Sei no Kakuritsu
93                                           Chihayafuru 2
94                                               Fate/Zero
121                      Kino no Tabi: The Beautiful World
134                                        Owarimonogatari
170                                 Tenkuu no Shiro Laputa
185                           Danshi Koukousei no Nichijou
186                                       Kotonoha no Niwa
187      Kyoukai no Kanata Movie: I&#039;ll Be 

Here we fit the model, since we fit with a 2 million row dataframe, the runtime will be slow
WARNING: 16 MIN RUNTIME ON A 12 CORE SYSTEM

In [None]:
user_66789  = anime_df.copy()
user_66789 = user_66789.reset_index()
user_66789 = user_66789[~user_66789['anime_id'].isin(anime_drop_list)]

combined_data = dataset

training_set = combined_data.build_full_trainset()
svd.fit(training_set)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1b60c2e96f0>

In [None]:
user_66789['score'] = user_66789['anime_id'].apply(lambda x: svd.predict(66789, x).est)

user_66789 = user_66789.drop(['anime_id', 'index'], axis = 1)

user_66789 = user_66789.sort_values('members', ascending = False)
user_66789.head()

Unnamed: 0,name,genre,type,episodes,rating,members,rating_rounded,length,popularity,score
4728,Taka no Tsume 8: Yoshida-kun no X-Files,"Comedy, Parody",Movie,1.0,10.0,13,10.0,movie/OVA,False,5.0
4686,Spoon-hime no Swing Kitchen,"Adventure, Kids",TV,,9.6,47,10.0,,False,5.0
4122,Mogura no Motoro,Slice of Life,Movie,1.0,9.5,62,10.0,movie/OVA,False,5.0
0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1.0,9.37,200630,9.0,movie/OVA,True,5.0
3777,Kahei no Umi,Historical,Movie,1.0,9.33,44,9.0,movie/OVA,False,5.0


These cells use a pearson's R coefficient modeling which is another collaborative filtering model that will load preferences based on a pearsonR score that represents the correlation between 2 shows to see if it is a good fit for the user

In [None]:
def recommendation(title, min_count):
    index = int(anime_df.index[anime_df['name'] == title][0])

    target = anime_df_pivot[index]
    similarity = anime_df_pivot.corrwith(target)

    corr_t = pd.DataFrame(similarity, columns = ['PearsonR'])
    corr_t.dropna(inplace = True)
    corr_t = corr_t.sort_values('PearsonR', ascending = False)
    corr_t.index = corr_t.index.map(int)
    corr_t = corr_t.join(anime_df).join(anime_df_summary)[['PearsonR', 'name', 'count', 'mean']]
    print(f'--Recommendations based on {title}--')
    print(corr_t[(corr_t['count'] > min_count) & (~corr_t['name'].isnull())][:15].to_string(index = False))




In [None]:
recommendation('91 Days', 1000)

  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)


--Recommendations based on 91 Days--
 PearsonR                                               name  count     mean
      1.0                               Xiyue Qi Tong (2006)     11 5.909091
      1.0 Little Busters!: Sekai no Saitou wa Ore ga Mamoru!     94 6.510638
      1.0                                     Kanon Kazahana     27 7.629630
      1.0                                     Hiyokoi (2012)     25 7.600000
      1.0                       Bokusatsu Tenshi Dokuro-chan     24 4.958333
      1.0              Kono Danshi, Uchuujin to Tatakaemasu.     11 5.272727
      1.0                            Mobile Fighter G Gundam     27 7.555556
      1.0                                    Kiko-chan Smile     15 5.533333
      1.0                         Rinne no Lagrange Season 2     17 6.764706
      1.0                              Kamen no Maid Guy OVA     29 5.620690
      1.0             Cardfight!! Vanguard: Asia Circuit-hen     41 5.195122
      1.0                           Man