In [94]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

In [95]:
reviewsdf = pd.read_csv('data/reviews.csv')
profilesdf = pd.read_csv('data/profiles.csv')
animesdf = pd.read_csv('data/animes.csv')
userlistdf = pd.read_csv('data/users_cleaned.csv')

In [96]:
reviewsdf.columns

Index(['uid', 'profile', 'anime_uid', 'text', 'score', 'scores', 'link'], dtype='object')

In [97]:
profilesdf.columns

Index(['profile', 'gender', 'birthday', 'favorites_anime', 'link'], dtype='object')

In [98]:
animesdf.columns

Index(['uid', 'title', 'synopsis', 'genre', 'aired', 'episodes', 'members',
       'popularity', 'ranked', 'score', 'img_url', 'link'],
      dtype='object')

In [99]:
mergedf1 = pd.merge(reviewsdf, animesdf, 
                   on='uid', 
                   how='outer')
print(mergedf1)

           uid         profile  anime_uid  \
0       255938  DesolatePsyche    34096.0   
1       255938  DesolatePsyche    34096.0   
2       259117       baekbeans    34599.0   
3       259117       baekbeans    34599.0   
4       253664            skrn    28891.0   
...        ...             ...        ...   
204577   27829             NaN        NaN   
204578    2649             NaN        NaN   
204579    8676             NaN        NaN   
204580   36043             NaN        NaN   
204581   33082             NaN        NaN   

                                                     text  score_x  \
0       \n           \n         \n           \n       ...      8.0   
1       \n           \n         \n           \n       ...      8.0   
2       \n           \n         \n           \n       ...     10.0   
3       \n           \n         \n           \n       ...     10.0   
4       \n           \n         \n           \n       ...      7.0   
...                                    

In [100]:
dfmerge = pd.DataFrame(data=mergedf1)
dfmerge

Unnamed: 0,uid,profile,anime_uid,text,score_x,scores,link_x,title,synopsis,genre,aired,episodes,members,popularity,ranked,score_y,img_url,link_y
0,255938,DesolatePsyche,34096.0,\n \n \n \n ...,8.0,"{'Overall': '8', 'Story': '8', 'Animation': '8...",https://myanimelist.net/reviews.php?id=255938,,,,,,,,,,,
1,255938,DesolatePsyche,34096.0,\n \n \n \n ...,8.0,"{'Overall': '8', 'Story': '8', 'Animation': '8...",https://myanimelist.net/reviews.php?id=255938,,,,,,,,,,,
2,259117,baekbeans,34599.0,\n \n \n \n ...,10.0,"{'Overall': '10', 'Story': '10', 'Animation': ...",https://myanimelist.net/reviews.php?id=259117,,,,,,,,,,,
3,259117,baekbeans,34599.0,\n \n \n \n ...,10.0,"{'Overall': '10', 'Story': '10', 'Animation': ...",https://myanimelist.net/reviews.php?id=259117,,,,,,,,,,,
4,253664,skrn,28891.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '9...",https://myanimelist.net/reviews.php?id=253664,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
204577,27829,,,,,,,Heavy Object,"In the distant future, the nature of war has c...","['Action', 'Military', 'Sci-Fi', 'Mecha']","Oct 3, 2015 to Mar 26, 2016",24.0,111872.0,1008.0,2367.0,7.34,https://cdn.myanimelist.net/images/anime/13/75...,https://myanimelist.net/anime/27829/Heavy_Object
204578,2649,,,,,,,Doraemon: Obaachan no Omoide,Nobita misses his granny that died a few years...,"['Comedy', 'Fantasy', 'Kids', 'Sci-Fi', 'Shoun...","Mar 4, 2000",1.0,1481.0,8667.0,2183.0,7.38,https://cdn.myanimelist.net/images/anime/6/274...,https://myanimelist.net/anime/2649/Doraemon__O...
204579,8676,,,,,,,Amagami SS,"Two years ago, Junichi Tachibana had a date on...","['Slice of Life', 'Comedy', 'Romance', 'School']","Jul 2, 2010 to Dec 24, 2010",25.0,240377.0,417.0,1885.0,7.45,https://cdn.myanimelist.net/images/anime/10/78...,https://myanimelist.net/anime/8676/Amagami_SS
204580,36043,,,,,,,Net-juu no Susume Special,An unaired 11th episode included with the Blu-...,"['Game', 'Comedy', 'Romance']","Dec 8, 2017",1.0,62448.0,1630.0,1802.0,7.48,https://cdn.myanimelist.net/images/anime/7/901...,https://myanimelist.net/anime/36043/Net-juu_no...


In [101]:
dfmerge.isnull().sum().sum()

2064198

In [102]:
dfmerge.all()

uid            True
profile        True
anime_uid      True
text           True
score_x       False
scores         True
link_x         True
title          True
synopsis       True
genre          True
aired          True
episodes       True
members        True
popularity     True
ranked         True
score_y        True
img_url        True
link_y         True
dtype: bool

In [103]:
cleandf = dfmerge.dropna()
cleandf

Unnamed: 0,uid,profile,anime_uid,text,score_x,scores,link_x,title,synopsis,genre,aired,episodes,members,popularity,ranked,score_y,img_url,link_y
74,29323,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],"Dec 31, 2014",1.0,360.0,11732.0,8664.0,5.90,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...
75,29323,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],"Dec 31, 2014",1.0,360.0,11732.0,8664.0,5.90,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...
116,30968,ParaParaJMo,1253.0,\n \n \n \n ...,9.0,"{'Overall': '9', 'Story': '9', 'Animation': '9...",https://myanimelist.net/reviews.php?id=30968,Kokoro no Catchball,An educational anime about the importance of h...,"['Kids', 'Sports']",2005,1.0,100.0,15323.0,12764.0,6.70,https://cdn.myanimelist.net/images/anime/2/745...,https://myanimelist.net/anime/30968/Kokoro_no_...
117,30968,ParaParaJMo,1253.0,\n \n \n \n ...,9.0,"{'Overall': '9', 'Story': '9', 'Animation': '9...",https://myanimelist.net/reviews.php?id=30968,Kokoro no Catchball,An educational anime about the importance of h...,"['Kids', 'Sports']",2005,1.0,100.0,15323.0,12764.0,6.70,https://cdn.myanimelist.net/images/anime/2/745...,https://myanimelist.net/anime/30968/Kokoro_no_...
192,37548,vanmarckestevy,9563.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '8...",https://myanimelist.net/reviews.php?id=37548,Metroid Short Animation,A fan-made animation of the Metroid franchis...,['Sci-Fi'],"Jun 6, 2016",1.0,248.0,12698.0,8481.0,5.96,https://cdn.myanimelist.net/images/anime/1285/...,https://myanimelist.net/anime/37548/Metroid_Sh...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194088,35975,DangoSage,10033.0,\n \n \n \n ...,8.0,"{'Overall': '8', 'Story': '8', 'Animation': '7...",https://myanimelist.net/reviews.php?id=35975,"Hope: Suiso Shakai to, Korekara no Energy no H...",A Toyota commissioned commercial explaining ...,['Slice of Life'],"Nov 17, 2014",1.0,293.0,12219.0,10943.0,4.95,https://cdn.myanimelist.net/images/anime/2/869...,https://myanimelist.net/anime/35975/Hope__Suis...
194105,10901,chidori3souske,2593.0,\n \n \n \n ...,9.0,"{'Overall': '9', 'Story': '8', 'Animation': '1...",https://myanimelist.net/reviews.php?id=10901,Aoi Hitomi no Onnanoko no Ohanashi,The story is set in a village in the southern ...,"['Drama', 'Historical']","Aug 13, 2009",1.0,509.0,10963.0,7230.0,6.26,https://cdn.myanimelist.net/images/anime/5/300...,https://myanimelist.net/anime/10901/Aoi_Hitomi...
194108,34734,Akuteru,2593.0,\n \n \n \n ...,9.0,"{'Overall': '9', 'Story': '10', 'Animation': '...",https://myanimelist.net/reviews.php?id=34734,Minami Kamakura Koukou Joshi Jitenshabu: Kita ...,Unaired episode included with the special edit...,"['Sports', 'School', 'Shounen']","May 15, 2017",1.0,2750.0,7197.0,6128.0,6.49,https://cdn.myanimelist.net/images/anime/5/837...,https://myanimelist.net/anime/34734/Minami_Kam...
194117,5909,Raptor73,2593.0,\n \n \n \n ...,10.0,"{'Overall': '10', 'Story': '10', 'Animation': ...",https://myanimelist.net/reviews.php?id=5909,Seitokai no Ichizon,Imagine living life as the vice president of t...,"['Harem', 'Comedy', 'Parody', 'School']","Oct 3, 2009 to Dec 19, 2009",12.0,150251.0,738.0,2077.0,7.41,https://cdn.myanimelist.net/images/anime/7/196...,https://myanimelist.net/anime/5909/Seitokai_no...


In [104]:
cleandf.isnull().sum().sum()

0

In [105]:
allmerge = pd.merge(cleandf, profilesdf, 
                   on='profile', 
                   how='outer')
print(allmerge)

            uid          profile  anime_uid  \
0       29323.0    Slushpuppy282     7588.0   
1       29323.0    Slushpuppy282     7588.0   
2       29323.0    Slushpuppy282     7588.0   
3       29323.0    Slushpuppy282     7588.0   
4       30968.0      ParaParaJMo     1253.0   
...         ...              ...        ...   
100888      NaN       daniel1302        NaN   
100889      NaN       bridgesams        NaN   
100890      NaN    Officer_Anime        NaN   
100891      NaN             Yuez        NaN   
100892      NaN  srry4apologizng        NaN   

                                                     text  score_x  \
0       \n           \n         \n           \n       ...      7.0   
1       \n           \n         \n           \n       ...      7.0   
2       \n           \n         \n           \n       ...      7.0   
3       \n           \n         \n           \n       ...      7.0   
4       \n           \n         \n           \n       ...      9.0   
...            

In [106]:
allmergedf = pd.DataFrame(data=allmerge)
allmergedf

Unnamed: 0,uid,profile,anime_uid,text,score_x,scores,link_x,title,synopsis,genre,...,members,popularity,ranked,score_y,img_url,link_y,gender,birthday,favorites_anime,link
0,29323.0,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],...,360.0,11732.0,8664.0,5.9,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",https://myanimelist.net/profile/Slushpuppy282
1,29323.0,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],...,360.0,11732.0,8664.0,5.9,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",https://myanimelist.net/profile/Slushpuppy282
2,29323.0,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],...,360.0,11732.0,8664.0,5.9,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",https://myanimelist.net/profile/Slushpuppy282
3,29323.0,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],...,360.0,11732.0,8664.0,5.9,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",https://myanimelist.net/profile/Slushpuppy282
4,30968.0,ParaParaJMo,1253.0,\n \n \n \n ...,9.0,"{'Overall': '9', 'Story': '9', 'Animation': '9...",https://myanimelist.net/reviews.php?id=30968,Kokoro no Catchball,An educational anime about the importance of h...,"['Kids', 'Sports']",...,100.0,15323.0,12764.0,6.7,https://cdn.myanimelist.net/images/anime/2/745...,https://myanimelist.net/anime/30968/Kokoro_no_...,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",https://myanimelist.net/profile/ParaParaJMo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100888,,daniel1302,,,,,,,,,...,,,,,,,,,[],https://myanimelist.net/profile/daniel1302
100889,,bridgesams,,,,,,,,,...,,,,,,,Male,"Jan 1, 2001","['37510', '35180', '30', '31043', '5081', '374...",https://myanimelist.net/profile/bridgesams
100890,,Officer_Anime,,,,,,,,,...,,,,,,,,,[],https://myanimelist.net/profile/Officer_Anime
100891,,Yuez,,,,,,,,,...,,,,,,,,,[],https://myanimelist.net/profile/Yuez


In [107]:
allmergedf.isnull().sum().sum()

1347096

In [108]:
mergeddf = allmergedf.dropna()
mergeddf

Unnamed: 0,uid,profile,anime_uid,text,score_x,scores,link_x,title,synopsis,genre,...,members,popularity,ranked,score_y,img_url,link_y,gender,birthday,favorites_anime,link
0,29323.0,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],...,360.0,11732.0,8664.0,5.90,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",https://myanimelist.net/profile/Slushpuppy282
1,29323.0,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],...,360.0,11732.0,8664.0,5.90,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",https://myanimelist.net/profile/Slushpuppy282
2,29323.0,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],...,360.0,11732.0,8664.0,5.90,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",https://myanimelist.net/profile/Slushpuppy282
3,29323.0,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],...,360.0,11732.0,8664.0,5.90,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",https://myanimelist.net/profile/Slushpuppy282
4,30968.0,ParaParaJMo,1253.0,\n \n \n \n ...,9.0,"{'Overall': '9', 'Story': '9', 'Animation': '9...",https://myanimelist.net/reviews.php?id=30968,Kokoro no Catchball,An educational anime about the importance of h...,"['Kids', 'Sports']",...,100.0,15323.0,12764.0,6.70,https://cdn.myanimelist.net/images/anime/2/745...,https://myanimelist.net/anime/30968/Kokoro_no_...,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",https://myanimelist.net/profile/ParaParaJMo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25775,29103.0,Samurai_Wolf337,8676.0,\n \n \n \n ...,9.0,"{'Overall': '9', 'Story': '9', 'Animation': '9...",https://myanimelist.net/reviews.php?id=29103,Tanoshii Sansuu,Music video for a song about arithmetic by Sei...,"['Music', 'Kids']",...,161.0,14104.0,10725.0,5.13,https://cdn.myanimelist.net/images/anime/5/701...,https://myanimelist.net/anime/29103/Tanoshii_S...,Male,"Jul 6, 1994","['356', '3588', '3342', '1575', '8074']",https://myanimelist.net/profile/Samurai_Wolf337
25776,13167.0,meri_nicole,5060.0,\n \n \n \n ...,9.0,"{'Overall': '9', 'Story': '8', 'Animation': '7...",https://myanimelist.net/reviews.php?id=13167,Zoobles!,The Candy Factory is a place where all Zoobles...,['Kids'],...,215.0,13177.0,14663.0,5.90,https://cdn.myanimelist.net/images/anime/4/368...,https://myanimelist.net/anime/13167/Zoobles,Female,Feb 25,"['205', '5060', '966', '2605']",https://myanimelist.net/profile/meri_nicole
25777,22745.0,samurai_gaz25,5060.0,\n \n \n \n ...,10.0,"{'Overall': '10', 'Story': '10', 'Animation': ...",https://myanimelist.net/reviews.php?id=22745,Brothers Conflict: Setsubou,Ema finds a special lamp that her father left ...,"['Harem', 'Romance', 'Shoujo']",...,16125.0,3558.0,5070.0,6.70,https://cdn.myanimelist.net/images/anime/3/591...,https://myanimelist.net/anime/22745/Brothers_C...,Female,"Aug 25, 1990","['1013', '2025', '4898', '5060']",https://myanimelist.net/profile/samurai_gaz25
25778,22745.0,samurai_gaz25,5060.0,\n \n \n \n ...,10.0,"{'Overall': '10', 'Story': '10', 'Animation': ...",https://myanimelist.net/reviews.php?id=22745,Brothers Conflict: Setsubou,Ema finds a special lamp that her father left ...,"['Harem', 'Romance', 'Shoujo']",...,16125.0,3558.0,5070.0,6.70,https://cdn.myanimelist.net/images/anime/3/591...,https://myanimelist.net/anime/22745/Brothers_C...,Female,"Aug 25, 1990","['1013', '2025', '4898', '5060']",https://myanimelist.net/profile/samurai_gaz25


In [109]:
mergeddf.isnull().sum().sum()

0

In [110]:
mergeddf

Unnamed: 0,uid,profile,anime_uid,text,score_x,scores,link_x,title,synopsis,genre,...,members,popularity,ranked,score_y,img_url,link_y,gender,birthday,favorites_anime,link
0,29323.0,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],...,360.0,11732.0,8664.0,5.90,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",https://myanimelist.net/profile/Slushpuppy282
1,29323.0,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],...,360.0,11732.0,8664.0,5.90,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",https://myanimelist.net/profile/Slushpuppy282
2,29323.0,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],...,360.0,11732.0,8664.0,5.90,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",https://myanimelist.net/profile/Slushpuppy282
3,29323.0,Slushpuppy282,7588.0,\n \n \n \n ...,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...",https://myanimelist.net/reviews.php?id=29323,"Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],...,360.0,11732.0,8664.0,5.90,https://cdn.myanimelist.net/images/anime/2/705...,https://myanimelist.net/anime/29323/Oyaji_no_I...,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",https://myanimelist.net/profile/Slushpuppy282
4,30968.0,ParaParaJMo,1253.0,\n \n \n \n ...,9.0,"{'Overall': '9', 'Story': '9', 'Animation': '9...",https://myanimelist.net/reviews.php?id=30968,Kokoro no Catchball,An educational anime about the importance of h...,"['Kids', 'Sports']",...,100.0,15323.0,12764.0,6.70,https://cdn.myanimelist.net/images/anime/2/745...,https://myanimelist.net/anime/30968/Kokoro_no_...,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",https://myanimelist.net/profile/ParaParaJMo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25775,29103.0,Samurai_Wolf337,8676.0,\n \n \n \n ...,9.0,"{'Overall': '9', 'Story': '9', 'Animation': '9...",https://myanimelist.net/reviews.php?id=29103,Tanoshii Sansuu,Music video for a song about arithmetic by Sei...,"['Music', 'Kids']",...,161.0,14104.0,10725.0,5.13,https://cdn.myanimelist.net/images/anime/5/701...,https://myanimelist.net/anime/29103/Tanoshii_S...,Male,"Jul 6, 1994","['356', '3588', '3342', '1575', '8074']",https://myanimelist.net/profile/Samurai_Wolf337
25776,13167.0,meri_nicole,5060.0,\n \n \n \n ...,9.0,"{'Overall': '9', 'Story': '8', 'Animation': '7...",https://myanimelist.net/reviews.php?id=13167,Zoobles!,The Candy Factory is a place where all Zoobles...,['Kids'],...,215.0,13177.0,14663.0,5.90,https://cdn.myanimelist.net/images/anime/4/368...,https://myanimelist.net/anime/13167/Zoobles,Female,Feb 25,"['205', '5060', '966', '2605']",https://myanimelist.net/profile/meri_nicole
25777,22745.0,samurai_gaz25,5060.0,\n \n \n \n ...,10.0,"{'Overall': '10', 'Story': '10', 'Animation': ...",https://myanimelist.net/reviews.php?id=22745,Brothers Conflict: Setsubou,Ema finds a special lamp that her father left ...,"['Harem', 'Romance', 'Shoujo']",...,16125.0,3558.0,5070.0,6.70,https://cdn.myanimelist.net/images/anime/3/591...,https://myanimelist.net/anime/22745/Brothers_C...,Female,"Aug 25, 1990","['1013', '2025', '4898', '5060']",https://myanimelist.net/profile/samurai_gaz25
25778,22745.0,samurai_gaz25,5060.0,\n \n \n \n ...,10.0,"{'Overall': '10', 'Story': '10', 'Animation': ...",https://myanimelist.net/reviews.php?id=22745,Brothers Conflict: Setsubou,Ema finds a special lamp that her father left ...,"['Harem', 'Romance', 'Shoujo']",...,16125.0,3558.0,5070.0,6.70,https://cdn.myanimelist.net/images/anime/3/591...,https://myanimelist.net/anime/22745/Brothers_C...,Female,"Aug 25, 1990","['1013', '2025', '4898', '5060']",https://myanimelist.net/profile/samurai_gaz25


In [111]:
mergeddf.columns

Index(['uid', 'profile', 'anime_uid', 'text', 'score_x', 'scores', 'link_x',
       'title', 'synopsis', 'genre', 'aired', 'episodes', 'members',
       'popularity', 'ranked', 'score_y', 'img_url', 'link_y', 'gender',
       'birthday', 'favorites_anime', 'link'],
      dtype='object')

In [112]:
#lastmerge = pd.merge(userlistdf, mergeddf, 
                   #on='gender', 
                   #how='inner')
#print(lastmerge)

In [113]:
#userlistdf.info()

In [114]:
mergeddf.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18429 entries, 0 to 25780
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   uid              18429 non-null  float64
 1   profile          18429 non-null  object 
 2   anime_uid        18429 non-null  float64
 3   text             18429 non-null  object 
 4   score_x          18429 non-null  float64
 5   scores           18429 non-null  object 
 6   link_x           18429 non-null  object 
 7   title            18429 non-null  object 
 8   synopsis         18429 non-null  object 
 9   genre            18429 non-null  object 
 10  aired            18429 non-null  object 
 11  episodes         18429 non-null  float64
 12  members          18429 non-null  float64
 13  popularity       18429 non-null  float64
 14  ranked           18429 non-null  float64
 15  score_y          18429 non-null  float64
 16  img_url          18429 non-null  object 
 17  link_y      

In [115]:
mergeddf = mergeddf.drop(columns=['link', 'link_y', 'img_url', 'link_x', 'text'], axis=1)

In [116]:
mergeddf

Unnamed: 0,uid,profile,anime_uid,score_x,scores,title,synopsis,genre,aired,episodes,members,popularity,ranked,score_y,gender,birthday,favorites_anime
0,29323.0,Slushpuppy282,7588.0,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...","Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],"Dec 31, 2014",1.0,360.0,11732.0,8664.0,5.90,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']"
1,29323.0,Slushpuppy282,7588.0,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...","Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],"Dec 31, 2014",1.0,360.0,11732.0,8664.0,5.90,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']"
2,29323.0,Slushpuppy282,7588.0,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...","Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],"Dec 31, 2014",1.0,360.0,11732.0,8664.0,5.90,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']"
3,29323.0,Slushpuppy282,7588.0,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...","Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],"Dec 31, 2014",1.0,360.0,11732.0,8664.0,5.90,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']"
4,30968.0,ParaParaJMo,1253.0,9.0,"{'Overall': '9', 'Story': '9', 'Animation': '9...",Kokoro no Catchball,An educational anime about the importance of h...,"['Kids', 'Sports']",2005,1.0,100.0,15323.0,12764.0,6.70,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25775,29103.0,Samurai_Wolf337,8676.0,9.0,"{'Overall': '9', 'Story': '9', 'Animation': '9...",Tanoshii Sansuu,Music video for a song about arithmetic by Sei...,"['Music', 'Kids']","Feb 8, 1988",1.0,161.0,14104.0,10725.0,5.13,Male,"Jul 6, 1994","['356', '3588', '3342', '1575', '8074']"
25776,13167.0,meri_nicole,5060.0,9.0,"{'Overall': '9', 'Story': '8', 'Animation': '7...",Zoobles!,The Candy Factory is a place where all Zoobles...,['Kids'],"May 18, 2011 to Nov 16, 2011",26.0,215.0,13177.0,14663.0,5.90,Female,Feb 25,"['205', '5060', '966', '2605']"
25777,22745.0,samurai_gaz25,5060.0,10.0,"{'Overall': '10', 'Story': '10', 'Animation': ...",Brothers Conflict: Setsubou,Ema finds a special lamp that her father left ...,"['Harem', 'Romance', 'Shoujo']","Feb 26, 2014",1.0,16125.0,3558.0,5070.0,6.70,Female,"Aug 25, 1990","['1013', '2025', '4898', '5060']"
25778,22745.0,samurai_gaz25,5060.0,10.0,"{'Overall': '10', 'Story': '10', 'Animation': ...",Brothers Conflict: Setsubou,Ema finds a special lamp that her father left ...,"['Harem', 'Romance', 'Shoujo']","Feb 26, 2014",1.0,16125.0,3558.0,5070.0,6.70,Female,"Aug 25, 1990","['1013', '2025', '4898', '5060']"


In [117]:
mergeddf.scores[0]

"{'Overall': '7', 'Story': '7', 'Animation': '6', 'Sound': '9', 'Character': '6', 'Enjoyment': '0'}"

In [148]:
mergeddf.drop_duplicates(inplace=True, ignore_index=True)

In [149]:
def scoreExtractor(df):
    Story = [string.split(',')[1].split(':')[1].replace("'",'') for string in df['scores']]
    Animation = [string.split(',')[2].split(':')[1].replace("'",'') for string in df['scores']]
    Sound = [string.split(',')[3].split(':')[1].replace("'",'') for string in df['scores']]
    Character = [string.split(',')[4].split(':')[1].replace("'",'') for string in df['scores']]
    Enjoyment = [string.split(',')[5].split(':')[1].replace("'",'').replace("}",'') for string in df['scores']]
    
    df['Story score'] = pd.Series(Story)
    df['Animation score'] = pd.Series(Animation)
    df['Sound score'] = pd.Series(Sound)
    df['Character score'] = pd.Series(Character)
    df['Enjoyment score'] = pd.Series(Enjoyment)
    
    return df

In [155]:
mergeddf = scoreExtractor(mergeddf)
mergeddf.head()

Unnamed: 0,uid,profile,anime_uid,score_x,scores,title,synopsis,genre,aired,episodes,...,ranked,score_y,gender,birthday,favorites_anime,Story score,Animation score,Sound score,Character score,Enjoyment score
0,29323.0,Slushpuppy282,7588.0,7.0,"{'Overall': '7', 'Story': '7', 'Animation': '6...","Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],"Dec 31, 2014",1.0,...,8664.0,5.9,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",7,6,9,6,0
1,30968.0,ParaParaJMo,1253.0,9.0,"{'Overall': '9', 'Story': '9', 'Animation': '9...",Kokoro no Catchball,An educational anime about the importance of h...,"['Kids', 'Sports']",2005,1.0,...,12764.0,6.7,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",9,9,9,9,9
2,8603.0,ParaParaJMo,1088.0,9.0,"{'Overall': '9', 'Story': '9', 'Animation': '8...",Tegami: Haikei Juugo no Kimi e,A music video from NHK's Minna no Uta featurin...,"['Music', 'Slice of Life', 'Kids', 'School']","Aug 1, 2008",1.0,...,6366.0,6.44,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",9,8,9,9,9
3,8584.0,ParaParaJMo,81.0,9.0,"{'Overall': '9', 'Story': '9', 'Animation': '1...",Future War 198X-nen,An American scientist constructs a laser–satel...,"['Military', 'Drama']","Oct 30, 1982",1.0,...,7481.0,6.21,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",9,10,10,9,9
4,30970.0,ParaParaJMo,6171.0,8.0,"{'Overall': '8', 'Story': '8', 'Animation': '9...",Jidou Bungaku Library,A series of children's OVAs covering various s...,"['Demons', 'Drama', 'Fantasy', 'Historical', '...",Not available,6.0,...,12466.0,6.83,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",8,9,8,9,8


In [156]:
mergeddf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4527 entries, 0 to 4526
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   uid              4527 non-null   float64
 1   profile          4527 non-null   object 
 2   anime_uid        4527 non-null   float64
 3   score_x          4527 non-null   float64
 4   scores           4527 non-null   object 
 5   title            4527 non-null   object 
 6   synopsis         4527 non-null   object 
 7   genre            4527 non-null   object 
 8   aired            4527 non-null   object 
 9   episodes         4527 non-null   float64
 10  members          4527 non-null   float64
 11  popularity       4527 non-null   float64
 12  ranked           4527 non-null   float64
 13  score_y          4527 non-null   float64
 14  gender           4527 non-null   object 
 15  birthday         4527 non-null   object 
 16  favorites_anime  4527 non-null   object 
 17  Story score   

In [28]:
mergeddf['score_x'].value_counts()

10.0    1101
9.0     1032
8.0      830
7.0      626
6.0      365
5.0      228
3.0      121
4.0      116
2.0       70
1.0       38
Name: score_x, dtype: int64

In [29]:
mergeddf['score_y'].unique()

array([5.9 , 6.7 , 6.44, 6.21, 6.83, 5.54, 5.86, 6.46, 6.11, 6.65, 5.85,
       6.04, 6.08, 7.  , 6.87, 4.77, 6.78, 6.77, 5.95, 5.58, 6.47, 5.42,
       5.96, 5.62, 5.61, 4.85, 7.1 , 6.37, 5.83, 5.35, 6.35, 5.46, 7.44,
       6.43, 7.11, 7.16, 6.69, 6.96, 7.49, 6.94, 8.46, 7.41, 7.45, 5.47,
       4.93, 4.25, 6.74, 6.12, 4.61, 5.72, 6.22, 5.92, 5.25, 6.27, 7.26,
       5.64, 4.68, 7.09, 5.5 , 7.46, 6.81, 7.52, 6.49, 4.97, 4.5 , 4.74,
       5.1 , 5.74, 4.87, 5.88, 4.78, 6.34, 5.76, 5.66, 6.09, 5.39, 8.68,
       8.29, 6.58, 7.3 , 7.32, 7.79, 5.78, 7.74, 6.53, 6.61, 6.99, 6.79,
       7.48, 6.17, 5.97, 3.93, 5.55, 7.02, 6.3 , 7.73, 7.78, 7.23, 6.26,
       7.29, 7.65, 6.71, 6.64, 6.63, 7.39, 6.32, 6.55, 7.07, 7.22, 6.75,
       6.68, 7.62, 7.4 , 7.47, 5.22, 7.89, 8.01, 6.25, 7.33, 6.4 , 6.88,
       8.11, 7.14, 7.88, 7.54, 8.18, 6.48, 7.43, 6.92, 7.34, 8.44, 5.52,
       7.87, 7.99, 6.18, 8.  , 6.51, 7.61, 7.24, 7.86, 7.42, 6.57, 7.5 ,
       8.47, 7.28, 5.11, 7.12, 6.86, 5.3 , 6.73, 7.

In [30]:
bins = np.arange(2,9)
bins

array([2, 3, 4, 5, 6, 7, 8])

In [31]:
mergeddf['binned_scores'] = pd.cut(mergeddf['score_y'], bins=bins, labels=bins[1:])
type(mergeddf['binned_scores'][0])

numpy.int64

In [32]:
mergeddf['score_x'] = mergeddf['score_x'].astype(int)


In [33]:
mergeddf.head()

Unnamed: 0,uid,profile,anime_uid,score_x,scores,title,synopsis,genre,aired,episodes,...,score_y,gender,birthday,favorites_anime,Story score,Animation score,Sound score,Character score,Enjoyment score,binned_scores
0,29323.0,Slushpuppy282,7588.0,7,"{'Overall': '7', 'Story': '7', 'Animation': '6...","Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],"Dec 31, 2014",1.0,...,5.9,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",7,6,9,6,0,6
4,30968.0,ParaParaJMo,1253.0,9,"{'Overall': '9', 'Story': '9', 'Animation': '9...",Kokoro no Catchball,An educational anime about the importance of h...,"['Kids', 'Sports']",2005,1.0,...,6.7,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",8,9,8,9,8,7
12,8603.0,ParaParaJMo,1088.0,9,"{'Overall': '9', 'Story': '9', 'Animation': '8...",Tegami: Haikei Juugo no Kimi e,A music video from NHK's Minna no Uta featurin...,"['Music', 'Slice of Life', 'Kids', 'School']","Aug 1, 2008",1.0,...,6.44,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",9,9,9,9,9,7
20,8584.0,ParaParaJMo,81.0,9,"{'Overall': '9', 'Story': '9', 'Animation': '1...",Future War 198X-nen,An American scientist constructs a laser–satel...,"['Military', 'Drama']","Oct 30, 1982",1.0,...,6.21,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",7,8,8,7,8,7
28,30970.0,ParaParaJMo,6171.0,8,"{'Overall': '8', 'Story': '8', 'Animation': '9...",Jidou Bungaku Library,A series of children's OVAs covering various s...,"['Demons', 'Drama', 'Fantasy', 'Historical', '...",Not available,6.0,...,6.83,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",8,8,6,9,7,7


In [46]:
mergeddf['Animation score'].value_counts()

 9     129
 8     125
 10    117
 7      86
 6      34
 5      25
 3       7
 4       6
 0       6
 1       2
 2       2
Name: Animation score, dtype: int64

In [34]:
mergeddf.head()

Unnamed: 0,uid,profile,anime_uid,score_x,scores,title,synopsis,genre,aired,episodes,...,score_y,gender,birthday,favorites_anime,Story score,Animation score,Sound score,Character score,Enjoyment score,binned_scores
0,29323.0,Slushpuppy282,7588.0,7,"{'Overall': '7', 'Story': '7', 'Animation': '6...","Oyaji no, Imo no Kamisama.",A man wanders into a liquor store and sees a f...,['Slice of Life'],"Dec 31, 2014",1.0,...,5.9,Female,"Dec 10, 1990","['407', '3588', '177', '1604', '269']",7,6,9,6,0,6
4,30968.0,ParaParaJMo,1253.0,9,"{'Overall': '9', 'Story': '9', 'Animation': '9...",Kokoro no Catchball,An educational anime about the importance of h...,"['Kids', 'Sports']",2005,1.0,...,6.7,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",8,9,8,9,8,7
12,8603.0,ParaParaJMo,1088.0,9,"{'Overall': '9', 'Story': '9', 'Animation': '8...",Tegami: Haikei Juugo no Kimi e,A music video from NHK's Minna no Uta featurin...,"['Music', 'Slice of Life', 'Kids', 'School']","Aug 1, 2008",1.0,...,6.44,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",9,9,9,9,9,7
20,8584.0,ParaParaJMo,81.0,9,"{'Overall': '9', 'Story': '9', 'Animation': '1...",Future War 198X-nen,An American scientist constructs a laser–satel...,"['Military', 'Drama']","Oct 30, 1982",1.0,...,6.21,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",7,8,8,7,8,7
28,30970.0,ParaParaJMo,6171.0,8,"{'Overall': '8', 'Story': '8', 'Animation': '9...",Jidou Bungaku Library,A series of children's OVAs covering various s...,"['Demons', 'Drama', 'Fantasy', 'Historical', '...",Not available,6.0,...,6.83,Male,"Jun 24, 1984","['1362', '263', '81', '2402', '85', '37991']",8,8,6,9,7,7


## Target assignment

- 0 is low y score high score x
- 1 is high score y low score x
- 2 low y low x
- 3 high y high x



In [35]:
mergeddf['target'] = np.where((mergeddf['score_y'] > 5) & (mergeddf['score_x'] > 5), 3, np.where((mergeddf['score_y'] <= 5 ) & (mergeddf['score_x'] <= 5), 2, np.where((mergeddf['score_y'] > 5) & (mergeddf['score_x'] <= 5),1, 0)))

In [36]:
mergeddf['target'].value_counts()

3    3645
1     526
0     309
2      47
Name: target, dtype: int64

## simple impute

## Smote

In [None]:
X
Y

In [38]:
numeric_features = X.select_dtypes(exclude="object").columns
numeric_transformer = Pipeline(
    steps=[("scaler", StandardScaler())]
)

categorical_features = X.select_dtypes(include="object").columns
categorical_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)

NameError: name 'X' is not defined

## First simple model