In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from multiprocessing import Pool

In [2]:
df = pd.read_pickle('../data/watch_list_clean.pkl.xz')

### Normalize User Ratings to their own rating behavior

In [34]:
df.head()

Unnamed: 0,title,url,username,status,times_watched,user_rating,num_eps,is_ongoing,duration,studio,start_year,end_year,season,avg_rating,num_votes,synopsis,tags,content_warnings,adj_user_rating1,adj_user_rating2
0,Day Break Illusion: il sole penetra le illusioni,https://www.anime-planet.com/anime/day-break-i...,Ruth,Watched,1,3.0,13,False,,AIC,2013.0,2013.0,Summer,3.158,1980,Akari has always known two things: she’s a ski...,"[Drama, Fantasy, Horror, Magical Girl, Contemp...",,-0.617182,2.777778
1,Dog Days,https://www.anime-planet.com/anime/dog-days,Ruth,Watched,1,3.5,13,False,,Seven Arcs,2011.0,2011.0,Spring,3.524,9447,"In the magical land of Flonyard, animal-eared ...","[Action, Adventure, Ecchi, Fantasy, Animal Cha...",,-0.073,3.333333
2,Dog Days',https://www.anime-planet.com/anime/dog-days-2,Ruth,Watched,1,3.0,13,False,,Seven Arcs,2012.0,2012.0,Summer,3.651,5345,Three months have passed since Shinku returned...,"[Action, Adventure, Ecchi, Fantasy, Animal Cha...",,-0.617182,2.777778
3,Fantasista Doll,https://www.anime-planet.com/anime/fantasista-...,Ruth,Watched,1,2.5,12,False,,Hoods Entertainment,2013.0,2013.0,Summer,2.918,1206,Uzume Uno was on her way to class one day when...,"[Magical Girl, Sci Fi, Seinen, Slice of Life, ...",,-1.161365,2.222222
4,Fate/Kaleid Liner Prisma Illya,https://www.anime-planet.com/anime/fate-kaleid...,Ruth,Watched,1,3.5,10,False,,SILVER LINK.,2013.0,2013.0,Summer,3.513,5655,Illya loves magical girls; but more than anyth...,"[Action, Fantasy, Magical Girl, Shounen, Conte...",,-0.073,3.333333


In [33]:
df.head()['user_rating'].apply(np.mean)

0    3.0
1    3.5
2    3.0
3    2.5
4    3.5
Name: user_rating, dtype: float64

In [60]:
def applyParallel(df, group_cols, apply_cols, func):
    num_workers = 12
    grouped = df.groupby(group_cols)[apply_cols]
    names, groups = np.array(grouped, dtype=object).T
    with Pool(num_workers) as p:
        return_list = p.map(func, [group for group in groups])
    return list(zip(names, return_list))

In [85]:
def _transform(args):
    data, func = args
    data = (data - data) + func(data)
    return data

In [98]:
def transformParallel(df, group_cols, transform_cols, func):
    num_workers = 12
    grouped = df.groupby(group_cols)[transform_cols]
    names, groups = np.array(grouped, dtype=object).T
    with Pool(num_workers) as p:
        return_list = p.map(_transform, [(group, func) for group in groups])
    return pd.concat(return_list)

In [99]:
def mean_normalization(x):
    return (x - x.mean()) / x.std()

In [100]:
def max_min_normalization(x):
    return 5 * (x - x.min()) / (x.max() - x.min())

In [101]:
%%time
res = transformParallel(df, 'username', 'user_rating', mean_normalization)

CPU times: user 25.4 s, sys: 1.55 s, total: 27 s
Wall time: 27.1 s


In [103]:
res.sort_index()

0         -0.617182
1         -0.073000
2         -0.617182
3         -1.161365
4         -0.073000
             ...   
9205629    0.756533
9205630    0.756533
9205631    0.756533
9205632    0.756533
9205633    0.756533
Name: user_rating, Length: 9205634, dtype: float32

In [96]:
%%time
df['adj_user_rating1'] = df.groupby(['username'])['user_rating'].transform(lambda x: (x - x.mean()) / x.std())

CPU times: user 41.6 s, sys: 148 ms, total: 41.8 s
Wall time: 41.7 s


In [4]:
df['adj_user_rating1'] = df.groupby(['username'])['user_rating'].transform(lambda x: (x - x.mean()) / x.std())
df['adj_user_rating2'] = df.groupby(['username'])['adj_user_rating1'].transform(lambda x: 5 * (x - x.min()) / (x.max() - x.min()))

In [None]:
df['adj_user_rating2']

In [None]:
df.tail()[['username', 'title', 'user_rating', 'adj_user_rating1', 'adj_user_rating2']]

In [None]:
df['adj_user_rating'].agg(['min', 'max'])

In [None]:
plt.hist(df['adj_user_rating'], bins=100)
plt.xlim(-5, 5)