In [1]:
import os.path
import numpy as np
import pandas as pd

In [2]:
pd.set_option('display.float_format', lambda x: '%.6f' % x)

## Load data

In [3]:
def load_data(dirname, filename):
    currdir = os.getcwd()
    datadir = os.path.abspath(os.path.join(currdir, dirname))
    return os.path.abspath(os.path.join(datadir, filename))

In [4]:
datadir = '../../../outputs/'
file_nmf_w = 'spatial-basis/nmf_weights.csv'
file_players = 'players-fg/players-summary-retained-sorted.csv'
outdir = '../../../outputs/spatial-basis'

df_nmf_w = pd.read_csv(os.path.join(datadir, file_nmf_w))
df_players = pd.read_csv(os.path.join(datadir, file_players))

In [5]:
bcols = ['b{}'.format(str(x)) for x in range(0, df_nmf_w.shape[1])]
bcols[0] = 'p_id_retained'
bcols
# df_nmf_w.columns = ['p_id', 'b1', 'b2', 'b3', 'b4', 'b5']
df_nmf_w.columns = bcols
df_nmf_w

Unnamed: 0,p_id_retained,b1,b2,b3,b4,b5
0,0,0.013103,0.007534,0.003139,0.000000,0.003876
1,1,0.005500,0.008615,0.006429,0.003855,0.007014
2,2,0.002609,0.013004,0.003614,0.003207,0.008969
3,3,0.009244,0.007910,0.003307,0.002830,0.006631
4,4,0.001521,0.011368,0.005784,0.012566,0.004084
...,...,...,...,...,...,...
77,77,0.011868,0.000000,0.000000,0.000000,0.017378
78,78,0.008492,0.008126,0.004654,0.012281,0.000000
79,79,0.010665,0.008539,0.000000,0.000001,0.009264
80,80,0.024498,0.000000,0.000739,0.000000,0.000000


In [6]:
# df_players.columns = ['p_id', 'player', 'team', 'fg', 'fga', 'points']
df_players

Unnamed: 0,p_id_retained,player,team,fg,fga,points
0,0,A. Pasaol,UE,125,294,273
1,1,A. Melecio,DLSU,78,203,188
2,2,R. Subido,UST,62,197,166
3,3,D. Ildefonso,NU,80,195,183
4,4,J. Ahanmisi,ADU,86,194,212
...,...,...,...,...,...,...
77,77,F. Jaboneta,UP,10,31,23
78,78,I. Batalier,UST,9,31,20
79,79,C. Vito,UP,14,30,31
80,80,S. Akomo,UST,15,28,30


## Normalize

In [7]:
num_basis = df_nmf_w.shape[1]
df_nmf_w_norm = df_nmf_w.copy(deep=True)
df_nmf_w_norm['player'] = df_players.player
df_nmf_w_norm['team'] = df_players.team
for b in range(1, num_basis):
    colname = 'b{}_norm'.format(b)
#     print(df_nmf_w[str(b)])
#     print(df_nmf_w[[str(x) for x in range(0, df_nmf_w.shape[1]-1)]].agg(sum))
#     print(b_norm)
#     print(colname)
    df_nmf_w_norm[colname] = df_nmf_w.apply(lambda row: row['b{}'.format(str(b))] / row[['b{}'.format(str(x)) for x in range(1, num_basis)]].agg(sum), axis=1)

# df_nmf_w_norm.to_csv()

In [8]:
df_nmf_w_norm

Unnamed: 0,p_id_retained,b1,b2,b3,b4,b5,player,team,b1_norm,b2_norm,b3_norm,b4_norm,b5_norm
0,0,0.013103,0.007534,0.003139,0.000000,0.003876,A. Pasaol,UE,0.473871,0.272447,0.113510,0.000002,0.140169
1,1,0.005500,0.008615,0.006429,0.003855,0.007014,A. Melecio,DLSU,0.175074,0.274262,0.204652,0.122723,0.223288
2,2,0.002609,0.013004,0.003614,0.003207,0.008969,R. Subido,UST,0.083097,0.414097,0.115078,0.102125,0.285603
3,3,0.009244,0.007910,0.003307,0.002830,0.006631,D. Ildefonso,NU,0.308937,0.264363,0.110523,0.094570,0.221607
4,4,0.001521,0.011368,0.005784,0.012566,0.004084,J. Ahanmisi,ADU,0.043059,0.321835,0.163749,0.355734,0.115624
...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,77,0.011868,0.000000,0.000000,0.000000,0.017378,F. Jaboneta,UP,0.405794,0.000006,0.000000,0.000000,0.594200
78,78,0.008492,0.008126,0.004654,0.012281,0.000000,I. Batalier,UST,0.253076,0.242191,0.138705,0.366028,0.000000
79,79,0.010665,0.008539,0.000000,0.000001,0.009264,C. Vito,UP,0.374604,0.299957,0.000000,0.000028,0.325411
80,80,0.024498,0.000000,0.000739,0.000000,0.000000,S. Akomo,UST,0.970732,0.000000,0.029268,0.000000,0.000000


In [9]:
df_nmf_w_norm.to_csv('{}/nmf_weights-players-retained.csv'.format(outdir), index=False)

In [10]:
teams = df_nmf_w_norm.team.unique()
for team in teams:
    df_nmf_w_norm[df_nmf_w_norm.team==team].to_csv('{}/nmf_weights-players-retained-{}.csv'.format(outdir, team), index=False)