Libs

In [25]:
import pandas as pd
from datetime import datetime
import pathlib
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")

In [26]:
abs_path = pathlib.Path.cwd().parent

Matches

In [27]:
file_read = pathlib.Path(abs_path).joinpath('data').joinpath('matches').joinpath('matches_processed.csv')

In [28]:
df_matches_raw = pd.read_csv(file_read)
del df_matches_raw['Unnamed: 0']

In [29]:
df_matches_raw_describe = df_matches_raw.describe()
df_matches_raw_describe = pd.pivot_table(df_matches_raw_describe, columns=["count", 'mean', 'std', 'min', '25%', '50%', '75%', 'max'])
df_matches_raw_describe

Unnamed: 0,25%,50%,75%,count,max,mean,min,std
ability1CastsValue,6.0,10.0,16.0,333679.0,87.0,11.532671,0.0,7.280749
ability2CastsValue,13.0,20.0,26.0,333679.0,109.0,20.509598,0.0,11.271582
acesValue,0.0,0.0,0.0,333835.0,3.0,0.027571,0.0,0.167447
assistsValue,3.0,4.0,7.0,333835.0,26.0,4.992239,0.0,3.220758
clutchesLostValue,1.0,2.0,3.0,277273.0,15.0,2.479892,1.0,1.433286
clutchesValue,1.0,1.0,2.0,161109.0,8.0,1.353978,0.0,0.921639
competitiveTier,26.0,27.0,27.0,333835.0,27.0,26.663373,25.0,0.567392
damagePerRoundValue,119.153846,145.111111,173.25,333835.0,461.0,147.668696,0.0,41.148931
damageReceivedValue,2497.0,2930.0,3354.0,333835.0,8020.0,2918.106265,0.0,711.47083
damageValue,2438.0,3083.0,3784.0,333835.0,11217.0,3144.430006,0.0,1023.790909


In [30]:
row_num = df_matches_raw.shape[0]
col_num = df_matches_raw.shape[1]
print('Total lines is: {} \nTotal columns is: {}'.format(row_num, col_num))

Total lines is: 333835 
Total columns is: 57


In [31]:
nan_num = df_matches_raw.isna().sum()
nan_num


matchId                          0
timestamp                        0
dateMatch                        0
hourMatch                        0
weekDay                          0
mapName                          0
mapImageUrl                      0
seasonName                       0
userId                           0
server                           0
leaderboardRank                  0
competitiveTier                  0
rankedRating                     0
hasWon                           0
result                           0
agentName                        0
playtimeValue                    0
roundsPlayedValue                0
roundsWonValue                   0
roundsLostValue                  0
roundsDisconnectedValue      74380
scoreValue                       0
killsValue                       0
deathsValue                      0
assistsValue                     0
damageValue                      0
damageReceivedValue              0
headshotsValue                   0
grenadeCastsValue   

In [54]:
df_matches_clean = df_matches_raw.dropna(how='all', axis=1)
df_matches_raw = df_matches_raw.fillna(0)
df_matches_clean.drop_duplicates('matchId',inplace=True)
df_matches_clean = df_matches_clean.reset_index(drop = True)

In [55]:
row_num = df_matches_clean.shape[0]
col_num = df_matches_clean.shape[1]
print('Total lines is: {} \nTotal columns is: {}'.format(row_num, col_num))

Total lines is: 209428 
Total columns is: 57


In [56]:
num_matches=df_matches_clean['matchId'].nunique()
num_matches

209428

In [57]:
num_playes=df_matches_clean['userId'].nunique()
num_playes

1869

In [58]:
df_cat_features = df_matches_clean[[
                                    'matchId', 'userId', 'dateMatch',
                                    'timestamp', 'mapName', 'mapImageUrl',
                                    'seasonName', 'userId', 'hasWon', 'result',
                                    'agentName','weekDay', 'mapName', 'seasonName',
                                    'server', 'hasWon', 'result', 'agentName',
                                    ]]

In [59]:
df_num_features = df_matches_clean[[
                                    'leaderboardRank', 'competitiveTier', 'rankedRating',
                                    'playtimeValue','roundsPlayedValue','roundsWonValue',
                                    'roundsLostValue', 'roundsDisconnectedValue', 'scoreValue',
                                    'killsValue', 'deathsValue', 'assistsValue', 'damageValue',
                                    'damageReceivedValue',	'headshotsValue', 'grenadeCastsValue',
                                    'ability1CastsValue', 'ability2CastsValue', 'ultimateCastsValue',
                                    'dealtHeadshotsValue', 'dealtBodyshotsValue', 'dealtLegshotsValue',
                                    'econRatingValue', 'suicidesValue',	'revivedValue',	'firstBloodsValue',	
                                    'firstDeathsValue',	'lastDeathsValue', 'survivedValue',
                                    'tradedValue', 'kASTValue',	'flawlessValue',
                                    ]]

In [60]:
df_matches_agents_result = df_matches_raw[['agentName','result']].groupby(['agentName','result',])['result'] \
                             .count() \
                             .reset_index(name='count') \
                             .sort_values(['agentName'], ascending=False) \
                             .reset_index(drop = True)

In [45]:
fig = px.histogram(df_matches_agents_result, x=df_matches_agents_result['agentName'], y=df_matches_agents_result['count'], color=df_matches_agents_result['result'], barmode='group',
             height=400)
fig.show()

In [46]:
df_matches_win_percentage_agents=100*df_matches_raw.groupby('agentName')[['hasWon']].sum()/df_matches_raw.groupby('agentName')[['hasWon']].count()
df_matches_win_percentage_agents

Unnamed: 0_level_0,hasWon
agentName,Unnamed: 1_level_1
Astra,52.399695
Breach,51.651885
Brimstone,54.171356
Chamber,52.957708
Cypher,51.493711
Fade,52.926122
Harbor,48.28722
Jett,53.172429
KAY/O,48.045564
Killjoy,55.926048


In [47]:
df_matches_wins_per_agent = df_matches_raw.groupby(['agentName'],as_index=False)[['agentName','hasWon']].sum(numeric_only=True)
df_matches_wins_per_agent


Unnamed: 0,agentName,hasWon
0,Astra,4127
1,Breach,4659
2,Brimstone,7701
3,Chamber,34485
4,Cypher,1310
5,Fade,12408
6,Harbor,733
7,Jett,24454
8,KAY/O,6158
9,Killjoy,1694


In [48]:
df_matches_date_count = (
                            df_matches_raw.groupby(['dateMatch'])['dateMatch']
                            .count() \
                            .reset_index(name='count') \
                            .reset_index(drop = True)
                        )

df_matches_date_count_plot = df_matches_date_count

In [49]:
fig = px.line(x = df_matches_date_count_plot['dateMatch'], y = df_matches_date_count_plot['count'])
fig.show()

In [50]:
df_matches_agents_selected = (
                                df_matches_raw.groupby(['agentName'])['agentName']
                                .count() 
                                .reset_index(name='count') 
                                .sort_values(['count'], ascending=False) 
                                .reset_index(drop = True)
                              )
df_matches_agents_selected

Unnamed: 0,agentName,count
0,Chamber,65118
1,Jett,45990
2,Reyna,26302
3,Sova,26105
4,Fade,23444
5,Viper,22749
6,Raze,21971
7,Omen,15574
8,Brimstone,14216
9,KAY/O,12817


In [51]:
fig = px.bar(df_matches_agents_selected, y='count', x='agentName', text_auto='.4s', orientation='v',
            title="Number of matches an agent has been selected")
fig.update_traces(textfont_size=10, textangle=0, textposition="outside", cliponaxis=False)

fig.show()

In [52]:
df_matches_maps = df_matches_raw[['mapName',]].groupby(['mapName'])['mapName'] \
                             .count() \
                             .reset_index(name='count') \
                             .sort_values(['count'], ascending=False) \
                             
df_matches_maps

Unnamed: 0,mapName,count
4,Haven,48369
1,Bind,48175
0,Ascent,48022
5,Icebox,48003
2,Breeze,47766
3,Fracture,47178
6,Pearl,44508
7,Split,1814


In [53]:
fig = px.bar(df_matches_maps, y='count', x='mapName', text_auto='.5s',
            title="Total number of matches per maps")
fig.update_traces(textfont_size=10, textangle=0, textposition="outside", cliponaxis=False)
fig.show()