In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('player_stats.csv')

In [3]:
df.dtypes

Unnamed: 0     int64
match_id       int64
game_id        int64
team          object
score_team     int64
opponent      object
score_opp      int64
win_lose      object
map           object
map_pick      object
player_id      int64
player        object
agent         object
rating        object
acs           object
kill           int64
death          int64
assist         int64
kast%         object
adr           object
hs%           object
fk            object
fd            object
dtype: object

In [4]:
conv_columns = ['rating','acs','kast%','adr','hs%','fk','fd']
for col in conv_columns:
    df[col] = df[col].apply(lambda x: 0.0 if x.strip() == '' else float(x.strip()[:-1]) if '%' in x else float(x))
df.iloc[:10]

Unnamed: 0.1,Unnamed: 0,match_id,game_id,team,score_team,opponent,score_opp,win_lose,map,map_pick,...,rating,acs,kill,death,assist,kast%,adr,hs%,fk,fd
0,0,53658,119512,ZETA,5,DRX,13,opponent win,Ascent,opponent pick,...,1.19,239.0,15,11,6,72.0,146.0,16.0,1.0,0.0
1,1,53658,119512,ZETA,5,DRX,13,opponent win,Ascent,opponent pick,...,0.84,234.0,15,14,6,61.0,145.0,32.0,1.0,4.0
2,2,53658,119512,ZETA,5,DRX,13,opponent win,Ascent,opponent pick,...,0.68,173.0,10,15,2,67.0,123.0,18.0,4.0,2.0
3,3,53658,119512,ZETA,5,DRX,13,opponent win,Ascent,opponent pick,...,0.63,142.0,8,15,4,83.0,92.0,29.0,3.0,2.0
4,4,53658,119512,ZETA,5,DRX,13,opponent win,Ascent,opponent pick,...,0.57,114.0,7,16,8,61.0,63.0,13.0,0.0,1.0
5,5,53658,119512,DRX,13,ZETA,5,team win,Ascent,team pick,...,1.67,257.0,15,9,20,83.0,172.0,30.0,1.0,2.0
6,6,53658,119512,DRX,13,ZETA,5,team win,Ascent,team pick,...,1.3,275.0,18,11,4,72.0,167.0,17.0,5.0,4.0
7,7,53658,119512,DRX,13,ZETA,5,team win,Ascent,team pick,...,1.27,299.0,17,15,8,83.0,190.0,28.0,2.0,1.0
8,8,53658,119512,DRX,13,ZETA,5,team win,Ascent,team pick,...,1.06,168.0,11,10,3,67.0,95.0,31.0,1.0,0.0
9,9,53658,119512,DRX,13,ZETA,5,team win,Ascent,team pick,...,0.87,125.0,10,10,0,67.0,63.0,19.0,0.0,2.0


In [5]:
# encoding binary data
df_new = df.copy()
df_new = df_new.drop(columns = ['match_id','Unnamed: 0']) # we drop match_id because we are predicting maps, not matches overall.
df_new = df_new.assign(team_win = df['win_lose']=='team win').drop(columns=['win_lose'])
df_new['map_pick'] = df_new['map_pick'] == 'team pick'
df_new.head()

Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,acs,kill,death,assist,kast%,adr,hs%,fk,fd,team_win
0,119512,ZETA,5,DRX,13,Ascent,False,999,Laz,sova,...,239.0,15,11,6,72.0,146.0,16.0,1.0,0.0,False
1,119512,ZETA,5,DRX,13,Ascent,False,1395,TENNN,killjoy,...,234.0,15,14,6,61.0,145.0,32.0,1.0,4.0,False
2,119512,ZETA,5,DRX,13,Ascent,False,424,Dep,jett,...,173.0,10,15,2,67.0,123.0,18.0,4.0,2.0,False
3,119512,ZETA,5,DRX,13,Ascent,False,1000,crow,kayo,...,142.0,8,15,4,83.0,92.0,29.0,3.0,2.0,False
4,119512,ZETA,5,DRX,13,Ascent,False,6668,SugarZ3ro,omen,...,114.0,7,16,8,61.0,63.0,13.0,0.0,1.0,False


In [7]:
agents = {'initiator': ['sova','skye','kayo','fade','breach','gekko'],
               'sentinel': ['killjoy','cypher','sage','chamber'],
               'controller': ['omen','viper','harbor','astra','brimstone'],
               'duelist': ['jett','raze','phoenix','yoru','neon','reyna']
               }
agent_class = {}
for key,lst in agents.items():
    for a in lst:
        agent_class[a] = key

agent_class

{'sova': 'initiator',
 'skye': 'initiator',
 'kayo': 'initiator',
 'fade': 'initiator',
 'breach': 'initiator',
 'gekko': 'initiator',
 'killjoy': 'sentinel',
 'cypher': 'sentinel',
 'sage': 'sentinel',
 'chamber': 'sentinel',
 'omen': 'controller',
 'viper': 'controller',
 'harbor': 'controller',
 'astra': 'controller',
 'brimstone': 'controller',
 'jett': 'duelist',
 'raze': 'duelist',
 'phoenix': 'duelist',
 'yoru': 'duelist',
 'neon': 'duelist',
 'reyna': 'duelist'}

In [8]:
df_new['agent_class'] = df_new['agent'].apply(lambda x: agent_class[x])
df_new.head()

Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,kill,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class
0,119512,ZETA,5,DRX,13,Ascent,False,999,Laz,sova,...,15,11,6,72.0,146.0,16.0,1.0,0.0,False,initiator
1,119512,ZETA,5,DRX,13,Ascent,False,1395,TENNN,killjoy,...,15,14,6,61.0,145.0,32.0,1.0,4.0,False,sentinel
2,119512,ZETA,5,DRX,13,Ascent,False,424,Dep,jett,...,10,15,2,67.0,123.0,18.0,4.0,2.0,False,duelist
3,119512,ZETA,5,DRX,13,Ascent,False,1000,crow,kayo,...,8,15,4,83.0,92.0,29.0,3.0,2.0,False,initiator
4,119512,ZETA,5,DRX,13,Ascent,False,6668,SugarZ3ro,omen,...,7,16,8,61.0,63.0,13.0,0.0,1.0,False,controller


In [9]:
df_new.columns

Index(['game_id', 'team', 'score_team', 'opponent', 'score_opp', 'map',
       'map_pick', 'player_id', 'player', 'agent', 'rating', 'acs', 'kill',
       'death', 'assist', 'kast%', 'adr', 'hs%', 'fk', 'fd', 'team_win',
       'agent_class'],
      dtype='object')

In [12]:
df_new['score_diff'] = df_new['score_team'] - df_new['score_opp']
df_new

Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,player_id,player,agent,...,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class,score_diff
0,119512,ZETA,5,DRX,13,Ascent,False,999,Laz,sova,...,11,6,72.0,146.0,16.0,1.0,0.0,False,initiator,-8
1,119512,ZETA,5,DRX,13,Ascent,False,1395,TENNN,killjoy,...,14,6,61.0,145.0,32.0,1.0,4.0,False,sentinel,-8
2,119512,ZETA,5,DRX,13,Ascent,False,424,Dep,jett,...,15,2,67.0,123.0,18.0,4.0,2.0,False,duelist,-8
3,119512,ZETA,5,DRX,13,Ascent,False,1000,crow,kayo,...,15,4,83.0,92.0,29.0,3.0,2.0,False,initiator,-8
4,119512,ZETA,5,DRX,13,Ascent,False,6668,SugarZ3ro,omen,...,16,8,61.0,63.0,13.0,0.0,1.0,False,controller,-8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6225,137642,EG,13,PRX,10,Lotus,True,26171,Demon1,astra,...,13,10,83.0,162.0,46.0,0.0,1.0,True,controller,3
6226,137642,EG,13,PRX,10,Lotus,True,3993,jawgemo,raze,...,18,5,78.0,225.0,18.0,7.0,3.0,True,duelist,3
6227,137642,EG,13,PRX,10,Lotus,True,604,Boostio,killjoy,...,16,1,65.0,131.0,36.0,2.0,2.0,True,sentinel,3
6228,137642,EG,13,PRX,10,Lotus,True,11225,Ethan,skye,...,17,15,74.0,108.0,29.0,2.0,3.0,True,initiator,3


# Split dataframe for teams analysis

In [10]:
team_df = df_new.copy()
team_df = team_df.drop(columns=['player','player_id','agent','agent_class'])
team_df.head()

Unnamed: 0,game_id,team,score_team,opponent,score_opp,map,map_pick,rating,acs,kill,death,assist,kast%,adr,hs%,fk,fd,team_win
0,119512,ZETA,5,DRX,13,Ascent,False,1.19,239.0,15,11,6,72.0,146.0,16.0,1.0,0.0,False
1,119512,ZETA,5,DRX,13,Ascent,False,0.84,234.0,15,14,6,61.0,145.0,32.0,1.0,4.0,False
2,119512,ZETA,5,DRX,13,Ascent,False,0.68,173.0,10,15,2,67.0,123.0,18.0,4.0,2.0,False
3,119512,ZETA,5,DRX,13,Ascent,False,0.63,142.0,8,15,4,83.0,92.0,29.0,3.0,2.0,False
4,119512,ZETA,5,DRX,13,Ascent,False,0.57,114.0,7,16,8,61.0,63.0,13.0,0.0,1.0,False


One-hot encode opponent team, map

In [11]:
team_df = pd.get_dummies(team_df, columns=['map','opponent'])
team_df.head()

Unnamed: 0,game_id,team,score_team,score_opp,map_pick,rating,acs,kill,death,assist,...,opponent_RRQ,opponent_SEN,opponent_SPB,opponent_T1,opponent_TH,opponent_TL,opponent_TLN,opponent_TS,opponent_VIT,opponent_ZETA
0,119512,ZETA,5,13,False,1.19,239.0,15,11,6,...,False,False,False,False,False,False,False,False,False,False
1,119512,ZETA,5,13,False,0.84,234.0,15,14,6,...,False,False,False,False,False,False,False,False,False,False
2,119512,ZETA,5,13,False,0.68,173.0,10,15,2,...,False,False,False,False,False,False,False,False,False,False
3,119512,ZETA,5,13,False,0.63,142.0,8,15,4,...,False,False,False,False,False,False,False,False,False,False
4,119512,ZETA,5,13,False,0.57,114.0,7,16,8,...,False,False,False,False,False,False,False,False,False,False


In [35]:
team_df = team_df.groupby(['game_id','team'], as_index=False).mean().drop(columns=['game_id']).set_index('team')
team_df.to_csv('team_df.csv')


# Split dataframe for player analysis

In [38]:
player_df = df_new.copy()
player_df = player_df.drop(columns = ['team','opponent','game_id','player_id','agent']).set_index('player')
player_df.head()

Unnamed: 0_level_0,score_team,score_opp,map,map_pick,rating,acs,kill,death,assist,kast%,adr,hs%,fk,fd,team_win,agent_class
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Laz,5,13,Ascent,False,1.19,239.0,15,11,6,72.0,146.0,16.0,1.0,0.0,False,initiator
TENNN,5,13,Ascent,False,0.84,234.0,15,14,6,61.0,145.0,32.0,1.0,4.0,False,sentinel
Dep,5,13,Ascent,False,0.68,173.0,10,15,2,67.0,123.0,18.0,4.0,2.0,False,duelist
crow,5,13,Ascent,False,0.63,142.0,8,15,4,83.0,92.0,29.0,3.0,2.0,False,initiator
SugarZ3ro,5,13,Ascent,False,0.57,114.0,7,16,8,61.0,63.0,13.0,0.0,1.0,False,controller


In [40]:
player_df = pd.get_dummies(player_df, columns=['map','agent_class'])
player_df.to_csv('player_df.csv')