In [2]:
import numpy as np
import pandas as pd
import plotly.express as px

In [3]:
df = pd.read_csv('player_stats.csv')

In [4]:
df.dtypes

Unnamed: 0     int64
match_id       int64
game_id        int64
team          object
score_team     int64
opponent      object
score_opp      int64
win_lose      object
map           object
map_pick      object
player_id      int64
player        object
agent         object
rating        object
acs           object
kill           int64
death          int64
assist         int64
kast%         object
adr           object
hs%           object
fk            object
fd            object
dtype: object

In [5]:
conv_columns = ['rating','acs','kast%','adr','hs%','fk','fd']
for col in conv_columns:
    df[col] = df[col].apply(lambda x: np.nan if x.strip() == '' else float(x.strip()[:-1]) if '%' in x else float(x))
df.iloc[:10]

Unnamed: 0.1,Unnamed: 0,match_id,game_id,team,score_team,opponent,score_opp,win_lose,map,map_pick,...,rating,acs,kill,death,assist,kast%,adr,hs%,fk,fd
0,0,53658,119512,ZETA,5,DRX,13,opponent win,Ascent,opponent pick,...,1.19,239.0,15,11,6,72.0,146.0,16.0,1.0,0.0
1,1,53658,119512,ZETA,5,DRX,13,opponent win,Ascent,opponent pick,...,0.84,234.0,15,14,6,61.0,145.0,32.0,1.0,4.0
2,2,53658,119512,ZETA,5,DRX,13,opponent win,Ascent,opponent pick,...,0.68,173.0,10,15,2,67.0,123.0,18.0,4.0,2.0
3,3,53658,119512,ZETA,5,DRX,13,opponent win,Ascent,opponent pick,...,0.63,142.0,8,15,4,83.0,92.0,29.0,3.0,2.0
4,4,53658,119512,ZETA,5,DRX,13,opponent win,Ascent,opponent pick,...,0.57,114.0,7,16,8,61.0,63.0,13.0,0.0,1.0
5,5,53658,119512,DRX,13,ZETA,5,team win,Ascent,team pick,...,1.67,257.0,15,9,20,83.0,172.0,30.0,1.0,2.0
6,6,53658,119512,DRX,13,ZETA,5,team win,Ascent,team pick,...,1.3,275.0,18,11,4,72.0,167.0,17.0,5.0,4.0
7,7,53658,119512,DRX,13,ZETA,5,team win,Ascent,team pick,...,1.27,299.0,17,15,8,83.0,190.0,28.0,2.0,1.0
8,8,53658,119512,DRX,13,ZETA,5,team win,Ascent,team pick,...,1.06,168.0,11,10,3,67.0,95.0,31.0,1.0,0.0
9,9,53658,119512,DRX,13,ZETA,5,team win,Ascent,team pick,...,0.87,125.0,10,10,0,67.0,63.0,19.0,0.0,2.0


In [6]:
# encoding binary data
df_new = df.copy()
df_new = df_new.drop(columns = ['match_id','Unnamed: 0']) # we drop match_id because we are predicting maps, not matches overall.
df_new['team_win'] = df['win_lose']=='team win'
df_new = df_new.drop(columns=['win_lose'])
df_new['map_pick'] = df_new['map_pick'] == 'team pick'
df_new.head()

Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,acs,kill,death,assist,kast%,adr,hs%,fk,fd,team_win
0,119512,ZETA,5,DRX,13,Ascent,False,999,Laz,sova,...,239.0,15,11,6,72.0,146.0,16.0,1.0,0.0,False
1,119512,ZETA,5,DRX,13,Ascent,False,1395,TENNN,killjoy,...,234.0,15,14,6,61.0,145.0,32.0,1.0,4.0,False
2,119512,ZETA,5,DRX,13,Ascent,False,424,Dep,jett,...,173.0,10,15,2,67.0,123.0,18.0,4.0,2.0,False
3,119512,ZETA,5,DRX,13,Ascent,False,1000,crow,kayo,...,142.0,8,15,4,83.0,92.0,29.0,3.0,2.0,False
4,119512,ZETA,5,DRX,13,Ascent,False,6668,SugarZ3ro,omen,...,114.0,7,16,8,61.0,63.0,13.0,0.0,1.0,False


In [7]:
agents = {'initiator': ['sova','skye','kayo','fade','breach','gekko'],
               'sentinel': ['killjoy','cypher','sage','chamber'],
               'controller': ['omen','viper','harbor','astra','brimstone'],
               'duelist': ['jett','raze','phoenix','yoru','neon','reyna']
               }
agent_class = {}
for key,lst in agents.items():
    for a in lst:
        agent_class[a] = key

agent_class

{'sova': 'initiator',
 'skye': 'initiator',
 'kayo': 'initiator',
 'fade': 'initiator',
 'breach': 'initiator',
 'gekko': 'initiator',
 'killjoy': 'sentinel',
 'cypher': 'sentinel',
 'sage': 'sentinel',
 'chamber': 'sentinel',
 'omen': 'controller',
 'viper': 'controller',
 'harbor': 'controller',
 'astra': 'controller',
 'brimstone': 'controller',
 'jett': 'duelist',
 'raze': 'duelist',
 'phoenix': 'duelist',
 'yoru': 'duelist',
 'neon': 'duelist',
 'reyna': 'duelist'}

In [8]:
df_new['agent_class'] = df_new['agent'].apply(lambda x: agent_class[x])
df_new.head()

Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,kill,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class
0,119512,ZETA,5,DRX,13,Ascent,False,999,Laz,sova,...,15,11,6,72.0,146.0,16.0,1.0,0.0,False,initiator
1,119512,ZETA,5,DRX,13,Ascent,False,1395,TENNN,killjoy,...,15,14,6,61.0,145.0,32.0,1.0,4.0,False,sentinel
2,119512,ZETA,5,DRX,13,Ascent,False,424,Dep,jett,...,10,15,2,67.0,123.0,18.0,4.0,2.0,False,duelist
3,119512,ZETA,5,DRX,13,Ascent,False,1000,crow,kayo,...,8,15,4,83.0,92.0,29.0,3.0,2.0,False,initiator
4,119512,ZETA,5,DRX,13,Ascent,False,6668,SugarZ3ro,omen,...,7,16,8,61.0,63.0,13.0,0.0,1.0,False,controller


In [9]:
df_new['score_diff'] = df_new['score_team'] - df_new['score_opp']
df_new

Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class,score_diff
0,119512,ZETA,5,DRX,13,Ascent,False,999,Laz,sova,...,11,6,72.0,146.0,16.0,1.0,0.0,False,initiator,-8
1,119512,ZETA,5,DRX,13,Ascent,False,1395,TENNN,killjoy,...,14,6,61.0,145.0,32.0,1.0,4.0,False,sentinel,-8
2,119512,ZETA,5,DRX,13,Ascent,False,424,Dep,jett,...,15,2,67.0,123.0,18.0,4.0,2.0,False,duelist,-8
3,119512,ZETA,5,DRX,13,Ascent,False,1000,crow,kayo,...,15,4,83.0,92.0,29.0,3.0,2.0,False,initiator,-8
4,119512,ZETA,5,DRX,13,Ascent,False,6668,SugarZ3ro,omen,...,16,8,61.0,63.0,13.0,0.0,1.0,False,controller,-8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6225,137642,EG,13,PRX,10,Lotus,True,26171,Demon1,astra,...,13,10,83.0,162.0,46.0,0.0,1.0,True,controller,3
6226,137642,EG,13,PRX,10,Lotus,True,3993,jawgemo,raze,...,18,5,78.0,225.0,18.0,7.0,3.0,True,duelist,3
6227,137642,EG,13,PRX,10,Lotus,True,604,Boostio,killjoy,...,16,1,65.0,131.0,36.0,2.0,2.0,True,sentinel,3
6228,137642,EG,13,PRX,10,Lotus,True,11225,Ethan,skye,...,17,15,74.0,108.0,29.0,2.0,3.0,True,initiator,3


Addressing null values from data collection

In [10]:
nan_section = df_new[df_new.isna().any(axis=1)]
nan_section

Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class,score_diff
1050,119677,ZETA,13,TS,15,Haven,False,999,Laz,sova,...,20,16,,,,,,False,initiator,-2
1051,119677,ZETA,13,TS,15,Haven,False,1000,crow,breach,...,20,9,,,,,,False,initiator,-2
1052,119677,ZETA,13,TS,15,Haven,False,424,Dep,jett,...,23,8,,,,,,False,duelist,-2
1053,119677,ZETA,13,TS,15,Haven,False,6668,SugarZ3ro,omen,...,20,10,,,,,,False,controller,-2
1054,119677,ZETA,13,TS,15,Haven,False,1395,TENNN,killjoy,...,22,2,,,,,,False,sentinel,-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5725,137389,NRG,15,ZETA,13,Bind,False,4,crashies,skye,...,17,10,,,,,,True,initiator,2
5726,137389,NRG,15,ZETA,13,Bind,False,98,ardiis,chamber,...,18,16,,,,,,True,sentinel,2
5727,137389,NRG,15,ZETA,13,Bind,False,261,Victor,raze,...,23,8,,,,,,True,duelist,2
5728,137389,NRG,15,ZETA,13,Bind,False,817,FiNESSE,viper,...,21,14,,,,,,True,controller,2


In [11]:
nan_ids = nan_section['game_id'].unique()
for id in nan_ids:
    display(df_new[df_new['game_id']==id])

Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class,score_diff
1050,119677,ZETA,13,TS,15,Haven,False,999,Laz,sova,...,20,16,,,,,,False,initiator,-2
1051,119677,ZETA,13,TS,15,Haven,False,1000,crow,breach,...,20,9,,,,,,False,initiator,-2
1052,119677,ZETA,13,TS,15,Haven,False,424,Dep,jett,...,23,8,,,,,,False,duelist,-2
1053,119677,ZETA,13,TS,15,Haven,False,6668,SugarZ3ro,omen,...,20,10,,,,,,False,controller,-2
1054,119677,ZETA,13,TS,15,Haven,False,1395,TENNN,killjoy,...,22,2,,,,,,False,sentinel,-2
1055,119677,TS,15,ZETA,13,Haven,True,2333,BORKUM,omen,...,21,17,,,,,,True,controller,2
1056,119677,TS,15,ZETA,13,Haven,True,2334,JessieVash,sova,...,15,12,,,,,,True,initiator,2
1057,119677,TS,15,ZETA,13,Haven,True,2679,DubsteP,killjoy,...,17,2,,,,,,True,sentinel,2
1058,119677,TS,15,ZETA,13,Haven,True,7405,Jremy,raze,...,21,4,,,,,,True,duelist,2
1059,119677,TS,15,ZETA,13,Haven,True,8504,invy,breach,...,21,8,,,,,,True,initiator,2


Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class,score_diff
1060,119678,ZETA,12,TS,14,Ascent,True,999,Laz,sova,...,20,7,,,,,,False,initiator,-2
1061,119678,ZETA,12,TS,14,Ascent,True,1000,crow,kayo,...,15,18,,,,,,False,initiator,-2
1062,119678,ZETA,12,TS,14,Ascent,True,424,Dep,jett,...,21,3,,,,,,False,duelist,-2
1063,119678,ZETA,12,TS,14,Ascent,True,6668,SugarZ3ro,omen,...,18,17,,,,,,False,controller,-2
1064,119678,ZETA,12,TS,14,Ascent,True,1395,TENNN,killjoy,...,20,5,,,,,,False,sentinel,-2
1065,119678,TS,14,ZETA,12,Ascent,False,2333,BORKUM,omen,...,18,14,,,,,,True,controller,2
1066,119678,TS,14,ZETA,12,Ascent,False,2334,JessieVash,sova,...,16,7,,,,,,True,initiator,2
1067,119678,TS,14,ZETA,12,Ascent,False,2679,DubsteP,jett,...,20,5,,,,,,True,duelist,2
1068,119678,TS,14,ZETA,12,Ascent,False,7405,Jremy,killjoy,...,21,6,,,,,,True,sentinel,2
1069,119678,TS,14,ZETA,12,Ascent,False,8504,invy,kayo,...,18,17,,,,,,True,initiator,2


Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class,score_diff
1740,126175,KOI,6,FUT,13,Icebox,False,339,koldamenta,viper,...,15,4,,,,,,False,controller,-7
1741,126175,KOI,6,FUT,13,Icebox,False,565,starxo,harbor,...,15,2,,,,,,False,controller,-7
1742,126175,KOI,6,FUT,13,Icebox,False,1428,sheydos,killjoy,...,11,2,,,,,,False,sentinel,-7
1743,126175,KOI,6,FUT,13,Icebox,False,2168,trexx,kayo,...,15,9,,,,,,False,initiator,-7
1744,126175,KOI,6,FUT,13,Icebox,False,9558,Wolfen,sova,...,15,7,,,,,,False,initiator,-7
1745,126175,FUT,13,KOI,6,Icebox,True,1139,qRaxs,fade,...,13,6,,,,,,True,initiator,7
1746,126175,FUT,13,KOI,6,Icebox,True,3604,MOJJ,killjoy,...,11,5,,,,,,True,sentinel,7
1747,126175,FUT,13,KOI,6,Icebox,True,6510,AtaKaptan,viper,...,11,7,,,,,,True,controller,7
1748,126175,FUT,13,KOI,6,Icebox,True,8044,MrFaliN,harbor,...,11,9,,,,,,True,controller,7
1749,126175,FUT,13,KOI,6,Icebox,True,10125,qw1,jett,...,10,2,,,,,,True,duelist,7


Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class,score_diff
2190,119295,FUT,11,NAVI,13,Pearl,False,10125,qw1,jett,...,14,1,,,,,,False,duelist,-2
2191,119295,FUT,11,NAVI,13,Pearl,False,1139,qRaxs,kayo,...,19,12,,,,,,False,initiator,-2
2192,119295,FUT,11,NAVI,13,Pearl,False,8044,MrFaliN,sova,...,17,7,,,,,,False,initiator,-2
2193,119295,FUT,11,NAVI,13,Pearl,False,3604,MOJJ,viper,...,18,5,,,,,,False,controller,-2
2194,119295,FUT,11,NAVI,13,Pearl,False,6510,AtaKaptan,astra,...,19,11,,,,,,False,controller,-2
2195,119295,NAVI,13,FUT,11,Pearl,True,573,cNed,jett,...,14,4,,,,,,True,duelist,2
2196,119295,NAVI,13,FUT,11,Pearl,True,2858,SUYGETSU,killjoy,...,19,2,,,,,,True,sentinel,2
2197,119295,NAVI,13,FUT,11,Pearl,True,11,Zyppan,kayo,...,18,10,,,,,,True,initiator,2
2198,119295,NAVI,13,FUT,11,Pearl,True,109,ANGE1,gekko,...,19,4,,,,,,True,initiator,2
2199,119295,NAVI,13,FUT,11,Pearl,True,384,Shao,astra,...,19,12,,,,,,True,controller,2


Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class,score_diff
2220,119300,BBL,11,TL,13,Split,False,556,SouhcNi,cypher,...,18,4,,,,,,False,sentinel,-2
2221,119300,BBL,11,TL,13,Split,False,568,Turko,skye,...,16,6,,,,,,False,initiator,-2
2222,119300,BBL,11,TL,13,Split,False,1481,AsLanM4shadoW,sage,...,18,6,,,,,,False,sentinel,-2
2223,119300,BBL,11,TL,13,Split,False,8041,Brave,omen,...,21,5,,,,,,False,controller,-2
2224,119300,BBL,11,TL,13,Split,False,10161,QutionerX,jett,...,15,2,,,,,,False,duelist,-2
2225,119300,TL,13,BBL,11,Split,True,101,soulcas,sage,...,14,13,,,,,,True,sentinel,2
2226,119300,TL,13,BBL,11,Split,True,312,Sayf,jett,...,13,2,,,,,,True,duelist,2
2227,119300,TL,13,BBL,11,Split,True,457,nAts,cypher,...,12,3,,,,,,True,sentinel,2
2228,119300,TL,13,BBL,11,Split,True,1427,Redgar,omen,...,18,8,,,,,,True,controller,2
2229,119300,TL,13,BBL,11,Split,True,9780,Jamppi,skye,...,15,10,,,,,,True,initiator,2


Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class,score_diff
3140,118980,NRG,13,C9,8,Haven,False,4164,s0m,astra,...,8,1,,,,,,True,controller,5
3141,118980,NRG,13,C9,8,Haven,False,261,Victor,killjoy,...,15,6,,,,,,True,sentinel,5
3142,118980,NRG,13,C9,8,Haven,False,4,crashies,sova,...,13,4,,,,,,True,initiator,5
3143,118980,NRG,13,C9,8,Haven,False,98,ardiis,jett,...,16,3,,,,,,True,duelist,5
3144,118980,NRG,13,C9,8,Haven,False,817,FiNESSE,breach,...,11,10,,,,,,True,initiator,5
3145,118980,C9,8,NRG,13,Haven,True,7873,leaf,jett,...,15,3,,,,,,False,duelist,-5
3146,118980,C9,8,NRG,13,Haven,True,16003,runi,sova,...,14,6,,,,,,False,initiator,-5
3147,118980,C9,8,NRG,13,Haven,True,7871,Xeppaa,breach,...,15,11,,,,,,False,initiator,-5
3148,118980,C9,8,NRG,13,Haven,True,729,Zellsis,killjoy,...,13,5,,,,,,False,sentinel,-5
3149,118980,C9,8,NRG,13,Haven,True,8742,jakee,omen,...,16,7,,,,,,False,controller,-5


Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class,score_diff
5720,137389,ZETA,13,NRG,15,Bind,True,424,Dep,neon,...,22,1,,,,,,False,duelist,-2
5721,137389,ZETA,13,NRG,15,Bind,True,999,Laz,viper,...,19,8,,,,,,False,controller,-2
5722,137389,ZETA,13,NRG,15,Bind,True,1000,crow,skye,...,20,10,,,,,,False,initiator,-2
5723,137389,ZETA,13,NRG,15,Bind,True,1395,TENNN,raze,...,21,4,,,,,,False,duelist,-2
5724,137389,ZETA,13,NRG,15,Bind,True,6668,SugarZ3ro,brimstone,...,17,7,,,,,,False,controller,-2
5725,137389,NRG,15,ZETA,13,Bind,False,4,crashies,skye,...,17,10,,,,,,True,initiator,2
5726,137389,NRG,15,ZETA,13,Bind,False,98,ardiis,chamber,...,18,16,,,,,,True,sentinel,2
5727,137389,NRG,15,ZETA,13,Bind,False,261,Victor,raze,...,23,8,,,,,,True,duelist,2
5728,137389,NRG,15,ZETA,13,Bind,False,817,FiNESSE,viper,...,21,14,,,,,,True,controller,2
5729,137389,NRG,15,ZETA,13,Bind,False,4164,s0m,brimstone,...,16,12,,,,,,True,controller,2


Null Value Imputation

Null hypothesis: the missingness of `rating` does not depend on `team`  
Alternative hypothesis: the missingness of `rating` does depend on `team`

In [33]:
quant_cols = [x for x in df_new.dtypes.index if df_new.dtypes[x] != 'object']
na_series  = df_new.isna().any()
na_cols = [x for x in na_series.index if na_series[x]]
na_cols

['rating', 'acs', 'kast%', 'adr', 'hs%', 'fk', 'fd']

In [31]:
def significance(col):
    # hypothesis test:
    def tvd(x, y, df): #Calculate total variation distance
        return np.sum(np.abs(df[x] - df[y]))/2

    col_1 = 'rating'
    col_2 = 'team'

    df_temp = df_new.copy()
    df_temp = df_temp[quant_cols+['team']]
    df_temp['missing'] = df_temp[col_1].isna() 
    df_temp['not_missing'] = df_temp[col_1].notna()
    grouped = df_temp.groupby(col_2).mean()

    observed = tvd('missing','not_missing', grouped) #observed

    tvds = [] #tvd list

    #Permutation
    for i in range (0,1000): #permutation loop
        df_temp[col_1] = np.random.permutation(df_temp[col_1])
        df_temp['missing'] = df_temp[col_1].isna()
        df_temp['not_missing'] = df_temp[col_1].notna()
        grouped = df_temp.groupby(col_2).mean()
        tvds.append(tvd('missing','not_missing',grouped))

    p_val = (observed <= np.array(tvds)).mean()
    return p_val, tvds, observed

In [34]:
pv_dict = {}
for i,col in enumerate(na_cols):
    pv, tvd_list, obs = significance(col)
    pv_dict[col] = pv
    if i == len(na_cols) - 1:
        print('plotting histogram for: ', col)

plotting histogram for:  fd


In [35]:
pv_dict

{'rating': 0.029,
 'acs': 0.027,
 'kast%': 0.018,
 'adr': 0.02,
 'hs%': 0.025,
 'fk': 0.023,
 'fd': 0.012}

In [39]:
fig = px.histogram(tvd_list, title = 'Emperical Distribution of TVD')
fig.add_vline(x=obs, line_color='red')
fig.add_annotation(text=f'<span style="color:red">Observed TVD = {round(obs)}</span>',
                   x = obs + 0.03, showarrow=False, y = 80)
fig.show()

Since all p-values are under the 5% significance level, we can reject the null hypothesis that the columns with missing data do not depend on `team`, so we can use mean-value imputing to fill the missing data by team average

In [46]:
df_imputed = df_new.copy()
df_imputed = df_new.groupby('team').transform(lambda x: x.fillna(x.mean()))
display(df_imputed)


Dropping invalid columns in DataFrameGroupBy.transform is deprecated. In a future version, a TypeError will be raised. Before calling .transform, select only columns which should be valid for the transforming function.



Unnamed: 0,game_id,score_team,score_opp,map_pick,player_id,rating,acs,kill,death,assist,kast%,adr,hs%,fk,fd,team_win,score_diff
0,119512,5,13,False,999,1.19,239.0,15,11,6,72.0,146.0,16.0,1.0,0.0,False,-8
1,119512,5,13,False,1395,0.84,234.0,15,14,6,61.0,145.0,32.0,1.0,4.0,False,-8
2,119512,5,13,False,424,0.68,173.0,10,15,2,67.0,123.0,18.0,4.0,2.0,False,-8
3,119512,5,13,False,1000,0.63,142.0,8,15,4,83.0,92.0,29.0,3.0,2.0,False,-8
4,119512,5,13,False,6668,0.57,114.0,7,16,8,61.0,63.0,13.0,0.0,1.0,False,-8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6225,137642,13,10,True,26171,1.43,244.0,23,13,10,83.0,162.0,46.0,0.0,1.0,True,3
6226,137642,13,10,True,3993,1.26,338.0,26,18,5,78.0,225.0,18.0,7.0,3.0,True,3
6227,137642,13,10,True,604,1.09,198.0,16,16,1,65.0,131.0,36.0,2.0,2.0,True,3
6228,137642,13,10,True,11225,0.86,180.0,13,17,15,74.0,108.0,29.0,2.0,3.0,True,3


In [47]:
df_imputed.isna().any()

game_id       False
score_team    False
score_opp     False
map_pick      False
player_id     False
rating        False
acs           False
kill          False
death         False
assist        False
kast%         False
adr           False
hs%           False
fk            False
fd            False
team_win      False
score_diff    False
dtype: bool

null values addressed, let's export it for Tableau modeling!

In [48]:
df_imputed.to_csv('data_cleaned.csv')

# Split dataframe for teams analysis

In [8]:
team_df = df_new.copy()
team_df = team_df.drop(columns=['player','player_id','agent','agent_class','opponent'])
team_df.head()

Unnamed: 0,game_id,team,score_team,score_opp,map,map_pick,rating,acs,kill,death,assist,kast%,adr,hs%,fk,fd,team_win,score_diff
0,119512,ZETA,5,13,Ascent,False,1.19,239,15,11,6,72%,146,16%,1,0,False,-8
1,119512,ZETA,5,13,Ascent,False,0.84,234,15,14,6,61%,145,32%,1,4,False,-8
2,119512,ZETA,5,13,Ascent,False,0.68,173,10,15,2,67%,123,18%,4,2,False,-8
3,119512,ZETA,5,13,Ascent,False,0.63,142,8,15,4,83%,92,29%,3,2,False,-8
4,119512,ZETA,5,13,Ascent,False,0.57,114,7,16,8,61%,63,13%,0,1,False,-8


One-hot encode opponent team, map

In [21]:
team_df = pd.get_dummies(team_df, columns=['map'])
team_df.head()

Unnamed: 0,game_id,team,score_team,score_opp,map_pick,rating,acs,kill,death,assist,...,team_win,score_diff,map_Ascent,map_Bind,map_Fracture,map_Haven,map_Icebox,map_Lotus,map_Pearl,map_Split
0,119512,ZETA,5,13,False,1.19,239.0,15,11,6,...,False,-8,True,False,False,False,False,False,False,False
1,119512,ZETA,5,13,False,0.84,234.0,15,14,6,...,False,-8,True,False,False,False,False,False,False,False
2,119512,ZETA,5,13,False,0.68,173.0,10,15,2,...,False,-8,True,False,False,False,False,False,False,False
3,119512,ZETA,5,13,False,0.63,142.0,8,15,4,...,False,-8,True,False,False,False,False,False,False,False
4,119512,ZETA,5,13,False,0.57,114.0,7,16,8,...,False,-8,True,False,False,False,False,False,False,False


In [22]:
team_df = team_df.groupby(['game_id','team'], as_index=False).mean().drop(columns=['game_id']).set_index('team')
team_df.to_csv('team_df.csv')


# Split dataframe for player analysis

In [25]:
player_df = df_new.copy()
player_df = player_df.drop(columns = ['team','opponent','game_id','player_id','agent']).set_index('player')
player_df.head()

Unnamed: 0_level_0,score_team,score_opp,map,map_pick,rating,acs,kill,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class,score_diff
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Laz,5,13,Ascent,False,1.19,239.0,15,11,6,72.0,146.0,16.0,1.0,0.0,False,initiator,-8
TENNN,5,13,Ascent,False,0.84,234.0,15,14,6,61.0,145.0,32.0,1.0,4.0,False,sentinel,-8
Dep,5,13,Ascent,False,0.68,173.0,10,15,2,67.0,123.0,18.0,4.0,2.0,False,duelist,-8
crow,5,13,Ascent,False,0.63,142.0,8,15,4,83.0,92.0,29.0,3.0,2.0,False,initiator,-8
SugarZ3ro,5,13,Ascent,False,0.57,114.0,7,16,8,61.0,63.0,13.0,0.0,1.0,False,controller,-8


In [26]:
player_df = pd.get_dummies(player_df, columns=['map','agent_class'])
player_df.to_csv('player_df.csv')
player_df.head()

Unnamed: 0_level_0,score_team,score_opp,map_pick,rating,acs,kill,death,assist,kast%,adr,...,map_Fracture,map_Haven,map_Icebox,map_Lotus,map_Pearl,map_Split,agent_class_controller,agent_class_duelist,agent_class_initiator,agent_class_sentinel
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Laz,5,13,False,1.19,239.0,15,11,6,72.0,146.0,...,False,False,False,False,False,False,False,False,True,False
TENNN,5,13,False,0.84,234.0,15,14,6,61.0,145.0,...,False,False,False,False,False,False,False,False,False,True
Dep,5,13,False,0.68,173.0,10,15,2,67.0,123.0,...,False,False,False,False,False,False,False,True,False,False
crow,5,13,False,0.63,142.0,8,15,4,83.0,92.0,...,False,False,False,False,False,False,False,False,True,False
SugarZ3ro,5,13,False,0.57,114.0,7,16,8,61.0,63.0,...,False,False,False,False,False,False,True,False,False,False
