In [464]:
import sys
sys.path.append('../../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from run import run
from utils import save_all_columns, save_columns

In [465]:
train = pd.read_csv('../../data/train.csv')
test = pd.read_csv('../../data/test.csv')
anime = pd.read_csv('../../data/anime.csv')
profile = pd.read_csv('../../data/profile.csv')

In [466]:
anime = anime.drop_duplicates(subset='id')

In [467]:
from datetime import datetime

def len_month(date1, date2):
    if pd.isnull(date1[0]) or pd.isnull(date1[1]) or pd.isnull(date2[0]) or pd.isnull(date2[1]):
        return np.nan
    year1, month1 = date1
    year2, month2 = date2
    
    date1 = datetime(int(year1), int(month1), 1)
    date2 = datetime(int(year2), int(month2), 1)
    
    diff = (date2.year - date1.year) * 12 + date2.month - date1.month
    return diff

In [468]:
def len_day(date1, date2):
    if pd.isnull(date1[0]) or pd.isnull(date1[1]) or pd.isnull(date1[2]) or \
       pd.isnull(date2[0]) or pd.isnull(date2[1]) or pd.isnull(date2[2]):
        return pd.NA

    year1, month1, day1 = int(date1[0]), int(date1[1]), int(date1[2])
    year2, month2, day2 = int(date2[0]), int(date2[1]), int(date2[2])
    
    date1_obj = datetime(year1, month1, day1)
    date2_obj = datetime(year2, month2, day2)
    
    difference = (date2_obj - date1_obj).days
    return difference


In [469]:
# animeをlabel encodingしておく
from sklearn.preprocessing import LabelEncoder

In [470]:
le = LabelEncoder()

In [471]:
anime['anime_label_encode'] = le.fit_transform(anime['id'])

In [472]:
anime

Unnamed: 0,id,title,synopsis,genre,episodes,members,popularity,ranked,start_month,start_day,start_year,end_month,end_day,end_year,anime_label_encode
0,28891,Haikyuu!! Second Season,Following their participation at the Inter-Hig...,"['Comedy', 'Sports', 'Drama', 'School', 'Shoun...",25.0,489888,141,25.0,10.0,4.0,2015.0,3.0,27.0,2016.0,9832
1,23273,Shigatsu wa Kimi no Uso,Music accompanies the path of the human metron...,"['Drama', 'Music', 'Romance', 'School', 'Shoun...",22.0,995473,28,24.0,10.0,10.0,2014.0,3.0,20.0,2015.0,8823
2,34599,Made in Abyss,The Abyss—a gaping chasm stretching down into ...,"['Sci-Fi', 'Adventure', 'Mystery', 'Drama', 'F...",13.0,581663,98,23.0,7.0,7.0,2017.0,9.0,29.0,2017.0,12420
3,5114,Fullmetal Alchemist: Brotherhood,"""In order for something to be obtained, someth...","['Action', 'Military', 'Adventure', 'Comedy', ...",64.0,1615084,4,1.0,4.0,5.0,2009.0,7.0,4.0,2010.0,3946
4,31758,Kizumonogatari III: Reiketsu-hen,After helping revive the legendary vampire Kis...,"['Action', 'Mystery', 'Supernatural', 'Vampire']",1.0,214621,502,22.0,1.0,6.0,2017.0,1.0,6.0,2017.0,11031
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16363,10075,Naruto x UT,All-new animation offered throughout UNIQLO cl...,"['Action', 'Comedy', 'Super Power', 'Martial A...",1.0,34155,2382,1728.0,1.0,1.0,2011.0,1.0,1.0,2011.0,6017
16364,35828,Miira no Kaikata,High school student Sora Kashiwagi is accustom...,"['Slice of Life', 'Comedy', 'Supernatural']",12.0,61459,1648,1727.0,1.0,12.0,2018.0,3.0,30.0,2018.0,13197
16365,10378,Shinryaku!? Ika Musume,"After regaining her squid-like abilities, Ika ...","['Slice of Life', 'Comedy', 'Shounen']",12.0,67422,1547,1548.0,9.0,27.0,2011.0,12.0,25.0,2011.0,6149
16366,33082,Kingsglaive: Final Fantasy XV,"For years, the Niflheim Empire and the kingdom...",['Action'],1.0,41077,2154,1544.0,7.0,9.0,2016.0,7.0,9.0,2016.0,11627


In [473]:
anime['len_year'] = anime['end_year'] - anime['start_year']
anime['len_month'] = anime.apply(lambda row : len_month((row['start_year'], row['start_month']), (row['end_year'], row['end_month'])), axis=1)
anime['len_day'] = anime.apply(lambda row: len_day((row['start_year'], row['start_month'], row['start_day']),
                                                                   (row['end_year'], row['end_month'], row['end_day'])), axis=1)

In [474]:
anime['episode_per_month'] = anime['episodes'].divide((anime['len_month']).where(anime['len_month'] != 0, -1))
anime['episode_per_day'] = anime['episodes'].divide((anime['len_day']).where(anime['len_month'] != 0, -1))

In [475]:
anime.loc[anime.index[anime['genre'].apply(lambda x: x=='[]')],'genre']="['None']"

In [476]:
anime['genre_list'] = anime['genre'].apply(eval)

anime_genre_categories = pd.get_dummies(anime['genre_list'].apply(pd.Series, dtype='object').stack()).groupby(level=0).sum()
anime_genre_categories['anime_id'] = anime['id']

In [477]:
all_user_anime = pd.concat([train[['user', 'anime_id']], test[['user', 'anime_id']]], axis=0).reset_index()

In [478]:
all_genren_df = pd.merge(
    all_user_anime,
    anime_genre_categories,
    left_on='anime_id',
    right_on='anime_id',
    how='left'
).drop(['index', 'anime_id'], axis=1)

In [479]:
all_genren_df

Unnamed: 0,user,Action,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,...,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri
0,cd931c240b,0,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,bf666ac921,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,24a73e9958,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
3,d45057b3ec,0,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,9b33e2839e,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130514,f60d21c6ff,0,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
130515,1efa7f7adc,0,0,0,0,0,0,1,0,0,...,0,1,0,0,0,1,0,0,0,0
130516,76b6afecea,0,1,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
130517,ae8792e8c9,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,0


In [480]:
train.columns[~train.columns.isin(['user', 'anime_id_x'])]

Index(['anime_id', 'text', 'score'], dtype='object')

In [481]:
all_genren_df

Unnamed: 0,user,Action,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,...,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri
0,cd931c240b,0,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,bf666ac921,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,24a73e9958,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
3,d45057b3ec,0,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,9b33e2839e,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130514,f60d21c6ff,0,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
130515,1efa7f7adc,0,0,0,0,0,0,1,0,0,...,0,1,0,0,0,1,0,0,0,0
130516,76b6afecea,0,1,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
130517,ae8792e8c9,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,0


In [482]:
user_df = all_genren_df.groupby('user').mean()

In [483]:
svd = TruncatedSVD(n_components=8, random_state=34)

In [484]:
embed = svd.fit_transform(user_df.iloc[:, 1:-2])

In [485]:
user_df.iloc[:, 1:1+8] = embed

In [486]:
user_df

Unnamed: 0_level_0,Action,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,...,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0002c10d94,1.0,0.912956,0.567997,-0.349048,0.940843,-0.733624,0.480409,0.054088,0.090305,1.000000,...,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
000383d201,0.0,0.354625,0.372393,-0.266042,0.273083,-0.362231,0.260864,0.139913,0.341126,0.000000,...,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
000580858d,0.0,1.172495,-0.100250,-0.190545,-0.594821,0.201801,0.690471,-0.650135,-0.291350,0.000000,...,0.0,0.000000,0.0,0.0,0.500000,1.000000,0.0,0.0,0.0,0.0
0006492d2e,1.0,1.080357,1.286739,-0.600679,-0.029399,-0.408168,0.046560,0.075762,-0.058861,0.000000,...,0.0,0.000000,0.0,0.0,0.333333,0.666667,0.0,0.0,0.0,0.0
00068c7501,0.5,1.116828,-0.159333,-0.002776,0.234110,-0.556268,0.138471,-0.176358,0.035482,0.166667,...,0.0,0.333333,0.0,0.0,0.000000,0.500000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fffd599c29,0.0,1.132534,-0.649303,-0.436521,-0.428649,-0.089786,0.235947,-0.247881,0.136083,0.000000,...,0.0,0.500000,0.0,0.0,0.000000,0.500000,0.0,0.0,0.0,0.0
ffff2d3a9c,0.0,0.563733,0.771774,-0.454854,0.587168,-0.425878,0.274495,0.254502,0.322298,0.000000,...,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
ffff9b9e3a,0.0,0.865171,-0.215462,-0.374871,-0.244398,0.442452,-0.050902,-0.422051,0.106454,0.000000,...,0.0,0.333333,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
ffffb3bf15,0.0,1.312391,-0.647816,0.735580,0.044358,-0.601136,0.127887,0.214452,-0.517113,0.000000,...,0.0,0.000000,0.0,0.0,0.000000,1.000000,0.0,0.0,0.0,0.0


In [487]:
emb = pd.DataFrame(
    embed,
    columns=["emb1", "emb2", "emb3", "emb4", "emb5", "emb6", "emb7", "emb18"],
    index=user_df.index
)

In [488]:
train = pd.merge(
    train,
    emb,
    on='user'
)

In [489]:
test = pd.merge(
    test,
    emb,
    on='user',
    how='left'
)

In [490]:
anime = pd.merge(
    anime,
    anime_genre_categories,
    left_on='id',
    right_on='anime_id',
    how='left'
)

In [491]:
anime

Unnamed: 0,id,title,synopsis,genre,episodes,members,popularity,ranked,start_month,start_day,...,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri,anime_id
0,28891,Haikyuu!! Second Season,Following their participation at the Inter-Hig...,"['Comedy', 'Sports', 'Drama', 'School', 'Shoun...",25.0,489888,141,25.0,10.0,4.0,...,0,0,1,0,0,0,0,0,0,28891
1,23273,Shigatsu wa Kimi no Uso,Music accompanies the path of the human metron...,"['Drama', 'Music', 'Romance', 'School', 'Shoun...",22.0,995473,28,24.0,10.0,10.0,...,0,0,0,0,0,0,0,0,0,23273
2,34599,Made in Abyss,The Abyss—a gaping chasm stretching down into ...,"['Sci-Fi', 'Adventure', 'Mystery', 'Drama', 'F...",13.0,581663,98,23.0,7.0,7.0,...,0,0,0,0,0,0,0,0,0,34599
3,5114,Fullmetal Alchemist: Brotherhood,"""In order for something to be obtained, someth...","['Action', 'Military', 'Adventure', 'Comedy', ...",64.0,1615084,4,1.0,4.0,5.0,...,0,0,0,0,0,0,0,0,0,5114
4,31758,Kizumonogatari III: Reiketsu-hen,After helping revive the legendary vampire Kis...,"['Action', 'Mystery', 'Supernatural', 'Vampire']",1.0,214621,502,22.0,1.0,6.0,...,0,0,0,0,1,0,1,0,0,31758
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16211,10075,Naruto x UT,All-new animation offered throughout UNIQLO cl...,"['Action', 'Comedy', 'Super Power', 'Martial A...",1.0,34155,2382,1728.0,1.0,1.0,...,0,0,0,1,0,0,0,0,0,10075
16212,35828,Miira no Kaikata,High school student Sora Kashiwagi is accustom...,"['Slice of Life', 'Comedy', 'Supernatural']",12.0,61459,1648,1727.0,1.0,12.0,...,1,0,0,0,1,0,0,0,0,35828
16213,10378,Shinryaku!? Ika Musume,"After regaining her squid-like abilities, Ika ...","['Slice of Life', 'Comedy', 'Shounen']",12.0,67422,1547,1548.0,9.0,27.0,...,1,0,0,0,0,0,0,0,0,10378
16214,33082,Kingsglaive: Final Fantasy XV,"For years, the Niflheim Empire and the kingdom...",['Action'],1.0,41077,2154,1544.0,7.0,9.0,...,0,0,0,0,0,0,0,0,0,33082


In [492]:
anime.head()

Unnamed: 0,id,title,synopsis,genre,episodes,members,popularity,ranked,start_month,start_day,...,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri,anime_id
0,28891,Haikyuu!! Second Season,Following their participation at the Inter-Hig...,"['Comedy', 'Sports', 'Drama', 'School', 'Shoun...",25.0,489888,141,25.0,10.0,4.0,...,0,0,1,0,0,0,0,0,0,28891
1,23273,Shigatsu wa Kimi no Uso,Music accompanies the path of the human metron...,"['Drama', 'Music', 'Romance', 'School', 'Shoun...",22.0,995473,28,24.0,10.0,10.0,...,0,0,0,0,0,0,0,0,0,23273
2,34599,Made in Abyss,The Abyss—a gaping chasm stretching down into ...,"['Sci-Fi', 'Adventure', 'Mystery', 'Drama', 'F...",13.0,581663,98,23.0,7.0,7.0,...,0,0,0,0,0,0,0,0,0,34599
3,5114,Fullmetal Alchemist: Brotherhood,"""In order for something to be obtained, someth...","['Action', 'Military', 'Adventure', 'Comedy', ...",64.0,1615084,4,1.0,4.0,5.0,...,0,0,0,0,0,0,0,0,0,5114
4,31758,Kizumonogatari III: Reiketsu-hen,After helping revive the legendary vampire Kis...,"['Action', 'Mystery', 'Supernatural', 'Vampire']",1.0,214621,502,22.0,1.0,6.0,...,0,0,0,0,1,0,1,0,0,31758


In [493]:
anime

Unnamed: 0,id,title,synopsis,genre,episodes,members,popularity,ranked,start_month,start_day,...,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri,anime_id
0,28891,Haikyuu!! Second Season,Following their participation at the Inter-Hig...,"['Comedy', 'Sports', 'Drama', 'School', 'Shoun...",25.0,489888,141,25.0,10.0,4.0,...,0,0,1,0,0,0,0,0,0,28891
1,23273,Shigatsu wa Kimi no Uso,Music accompanies the path of the human metron...,"['Drama', 'Music', 'Romance', 'School', 'Shoun...",22.0,995473,28,24.0,10.0,10.0,...,0,0,0,0,0,0,0,0,0,23273
2,34599,Made in Abyss,The Abyss—a gaping chasm stretching down into ...,"['Sci-Fi', 'Adventure', 'Mystery', 'Drama', 'F...",13.0,581663,98,23.0,7.0,7.0,...,0,0,0,0,0,0,0,0,0,34599
3,5114,Fullmetal Alchemist: Brotherhood,"""In order for something to be obtained, someth...","['Action', 'Military', 'Adventure', 'Comedy', ...",64.0,1615084,4,1.0,4.0,5.0,...,0,0,0,0,0,0,0,0,0,5114
4,31758,Kizumonogatari III: Reiketsu-hen,After helping revive the legendary vampire Kis...,"['Action', 'Mystery', 'Supernatural', 'Vampire']",1.0,214621,502,22.0,1.0,6.0,...,0,0,0,0,1,0,1,0,0,31758
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16211,10075,Naruto x UT,All-new animation offered throughout UNIQLO cl...,"['Action', 'Comedy', 'Super Power', 'Martial A...",1.0,34155,2382,1728.0,1.0,1.0,...,0,0,0,1,0,0,0,0,0,10075
16212,35828,Miira no Kaikata,High school student Sora Kashiwagi is accustom...,"['Slice of Life', 'Comedy', 'Supernatural']",12.0,61459,1648,1727.0,1.0,12.0,...,1,0,0,0,1,0,0,0,0,35828
16213,10378,Shinryaku!? Ika Musume,"After regaining her squid-like abilities, Ika ...","['Slice of Life', 'Comedy', 'Shounen']",12.0,67422,1547,1548.0,9.0,27.0,...,1,0,0,0,0,0,0,0,0,10378
16214,33082,Kingsglaive: Final Fantasy XV,"For years, the Niflheim Empire and the kingdom...",['Action'],1.0,41077,2154,1544.0,7.0,9.0,...,0,0,0,0,0,0,0,0,0,33082


In [494]:
anime = anime.drop(
    ['title', 'synopsis', 'genre'],
    axis=1
)

In [495]:
train = pd.merge(
    train, 
    anime,
    left_on='anime_id',
    right_on='id',
    how='left'
)

In [496]:
test = pd.merge(
    test, 
    anime,
    left_on='anime_id',
    right_on='id',
    how='left'
)

In [497]:
import re

def get_birth_year(birthday):
    if type(birthday) != str:
        return None
    pattern = r'\b\d{4}\b'
    matches = re.findall(pattern, birthday)
    if len(matches) > 1:
        raise ValueError("find twice yaer")
    elif len(matches) == 0:
        return None
    else:
        return int(matches[0])

In [498]:
profile = pd.read_csv('../../data/profile.csv')

In [499]:
profile['birth_year'] = profile['birthday'].apply(get_birth_year)

In [500]:
profile = pd.concat([
    profile, 
    pd.get_dummies(profile['gender'].fillna('NaN'))
],
    axis=1
).drop(['birthday'], axis=1)

In [502]:
profile["gender"] = profile["gender"].fillna("NaN")

In [503]:
train = pd.merge(
    train,
    profile,
    on='user',
    how='left'
)

In [504]:
test = pd.merge(
    test,
    profile,
    on='user',
    how='left'
)

In [505]:
train

Unnamed: 0,user,anime_id_x,text,score,emb1,emb2,emb3,emb4,emb5,emb6,...,Vampire,Yaoi,Yuri,anime_id_y,gender,birth_year,Female,Male,NaN,Non-Binary
0,cd931c240b,6574,\n \n \n \n ...,9,1.012439,-0.567000,-0.450207,-0.169131,0.219546,-0.301233,...,0,0,0,6574,Male,1987.0,False,True,False,False
1,cd931c240b,696,\n \n \n \n ...,8,1.012439,-0.567000,-0.450207,-0.169131,0.219546,-0.301233,...,0,0,0,696,Male,1987.0,False,True,False,False
2,cd931c240b,18671,\n \n \n \n ...,7,1.012439,-0.567000,-0.450207,-0.169131,0.219546,-0.301233,...,0,0,0,18671,Male,1987.0,False,True,False,False
3,cd931c240b,1887,\n \n \n \n ...,9,1.012439,-0.567000,-0.450207,-0.169131,0.219546,-0.301233,...,0,0,0,1887,Male,1987.0,False,True,False,False
4,cd931c240b,1888,\n \n \n \n ...,8,1.012439,-0.567000,-0.450207,-0.169131,0.219546,-0.301233,...,0,0,0,1888,Male,1987.0,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105344,390ead30b3,15809,\n \n \n \n ...,7,1.442797,0.162488,-0.366687,-0.187019,-0.765588,0.536387,...,0,0,0,15809,,,False,False,True,False
105345,dc55396b83,110,\n \n \n \n ...,7,1.085519,0.106955,-0.177296,-0.189036,0.468580,-0.882194,...,0,0,0,110,Male,1997.0,False,True,False,False
105346,22815a50b1,9756,\n \n \n \n ...,10,0.616212,0.283331,0.876315,0.115158,-0.063936,-0.573031,...,0,0,0,9756,,,False,False,True,False
105347,52c79b3a9c,228,\n \n \n \n ...,9,0.537338,0.477994,1.021022,-1.104988,-0.422709,0.282177,...,0,0,0,228,,,False,False,True,False


In [506]:
train['start_year']

0         2010.0
1         1995.0
2         2014.0
3         2007.0
4         2007.0
           ...  
105344    2013.0
105345    1997.0
105346    2011.0
105347    2005.0
105348    2009.0
Name: start_year, Length: 105349, dtype: float64

In [509]:
train['age_of_start'] = train.apply(
    lambda row : row['start_year'] - row['birth_year'],
    axis=1
)

In [511]:
train.columns

Index(['user', 'anime_id_x', 'text', 'score', 'emb1', 'emb2', 'emb3', 'emb4',
       'emb5', 'emb6', 'emb7', 'emb18', 'id', 'episodes', 'members',
       'popularity', 'ranked', 'start_month', 'start_day', 'start_year',
       'end_month', 'end_day', 'end_year', 'anime_label_encode', 'len_year',
       'len_month', 'len_day', 'episode_per_month', 'episode_per_day',
       'genre_list', 'Action', 'Adventure', 'Cars', 'Comedy', 'Dementia',
       'Demons', 'Drama', 'Ecchi', 'Fantasy', 'Game', 'Harem', 'Hentai',
       'Historical', 'Horror', 'Josei', 'Kids', 'Magic', 'Martial Arts',
       'Mecha', 'Military', 'Music', 'Mystery', 'None', 'Parody', 'Police',
       'Psychological', 'Romance', 'Samurai', 'School', 'Sci-Fi', 'Seinen',
       'Shoujo', 'Shoujo Ai', 'Shounen', 'Shounen Ai', 'Slice of Life',
       'Space', 'Sports', 'Super Power', 'Supernatural', 'Thriller', 'Vampire',
       'Yaoi', 'Yuri', 'anime_id_y', 'gender', 'birth_year', 'Female', 'Male',
       'NaN', 'Non-Binary', '

In [520]:
train.iloc[0, :]['start_year']

2010.0

In [523]:
train['age_of_start'] = train.apply(
    lambda row : row['start_year'] - row['birth_year'],
    axis=1
)

test['age_of_start'] = test.apply(
    lambda row : row['start_year'] - row['birth_year'],
    axis=1
)

In [526]:
test_seen = test[test["user"].isin(train["user"])].reset_index(drop=True)
test_unseen = test[~test["user"].isin(train["user"])].reset_index(drop=True)

In [527]:
save_all_columns(train, "train")

save columns: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 82/82 [00:01<00:00, 62.24it/s, save age_of_start]


In [528]:
test_seen = test_seen.drop("id", axis=1)
test_unseen = test_unseen.drop("id", axis=1)

In [529]:
save_all_columns(test_seen.rename(columns=(lambda x: "test_seen_" + x)), "test")
save_all_columns(test_unseen.rename(columns=(lambda x: "test_unseen_" + x)), "test")

save columns: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:00<00:00, 842.86it/s, save test_seen_age_of_start]
save columns: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:00<00:00, 1062.56it/s, save test_unseen_age_of_start]


In [530]:
train.columns

Index(['user', 'anime_id_x', 'text', 'score', 'emb1', 'emb2', 'emb3', 'emb4',
       'emb5', 'emb6', 'emb7', 'emb18', 'id', 'episodes', 'members',
       'popularity', 'ranked', 'start_month', 'start_day', 'start_year',
       'end_month', 'end_day', 'end_year', 'anime_label_encode', 'len_year',
       'len_month', 'len_day', 'episode_per_month', 'episode_per_day',
       'genre_list', 'Action', 'Adventure', 'Cars', 'Comedy', 'Dementia',
       'Demons', 'Drama', 'Ecchi', 'Fantasy', 'Game', 'Harem', 'Hentai',
       'Historical', 'Horror', 'Josei', 'Kids', 'Magic', 'Martial Arts',
       'Mecha', 'Military', 'Music', 'Mystery', 'None', 'Parody', 'Police',
       'Psychological', 'Romance', 'Samurai', 'School', 'Sci-Fi', 'Seinen',
       'Shoujo', 'Shoujo Ai', 'Shounen', 'Shounen Ai', 'Slice of Life',
       'Space', 'Sports', 'Super Power', 'Supernatural', 'Thriller', 'Vampire',
       'Yaoi', 'Yuri', 'anime_id_y', 'gender', 'birth_year', 'Female', 'Male',
       'NaN', 'Non-Binary', '