# Exploratory Data Analysis - Valorant

## Libs

In [1]:
# Data manipulation
import pandas as pd
from datetime import datetime
import pathlib

In [2]:
# Charts
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")

## Initial Settings and Loads

In [3]:
abs_path = pathlib.Path.cwd().parent

### Load Matches

In [4]:
file_read = pathlib.Path(abs_path).joinpath('data').joinpath('matches').joinpath('matches_processed.csv')
df_matches_raw = pd.read_csv(file_read)
del df_matches_raw['Unnamed: 0']

### Load Players Rank

In [5]:
file_read = pathlib.Path(abs_path).joinpath('data').joinpath('players').joinpath('players_processed.csv')
df_players_raw = pd.read_csv(file_read)

### Load Weapons

In [6]:
file_read = pathlib.Path(abs_path).joinpath('data').joinpath('weapons').joinpath('weapons_processed.csv')
df_weapons_raw = pd.read_csv(file_read)

## Main Dataset Dimenssions

In [7]:
min_data = df_matches_raw.dateMatch.min()
max_data = df_matches_raw.dateMatch.max()
unique_userId = df_matches_raw.userId.nunique()
unique_mapId = df_matches_raw.mapName.nunique()
unique_sever = df_matches_raw.server.nunique()
row_number = df_matches_raw.shape[0]
col_number = df_matches_raw.shape[1]

print('The dataset has a start date at : {} \nEnd date at : {} Match \nWith: unique players : {} \nMaps : {} \nGame servers : {} \nRow number : {} \nColumn number : {}'. format(min_data, max_data, unique_userId, unique_mapId, unique_sever, row_number, col_number))

The dataset has a start date at : 2020-06-28 
End date at : 2022-11-10 Match 
With: unique players : 1871 
Maps : 8 
Game servers : 6 
Row number : 333835 
Column number : 57


In [8]:
df_matches_raw_describe = df_matches_raw.describe()
df_matches_raw_describe_pivot = pd.pivot_table(df_matches_raw_describe, columns=["count", 'mean', 'std', 'min', '25%', '50%', '75%', 'max'])
df_matches_raw_describe_pivot

Unnamed: 0,25%,50%,75%,count,max,mean,min,std
ability1CastsValue,6.0,10.0,16.0,333679.0,87.0,11.532671,0.0,7.280749
ability2CastsValue,13.0,20.0,26.0,333679.0,109.0,20.509598,0.0,11.271582
acesValue,0.0,0.0,0.0,333835.0,3.0,0.027571,0.0,0.167447
assistsValue,3.0,4.0,7.0,333835.0,26.0,4.992239,0.0,3.220758
clutchesLostValue,1.0,2.0,3.0,277273.0,15.0,2.479892,1.0,1.433286
clutchesValue,1.0,1.0,2.0,161109.0,8.0,1.353978,0.0,0.921639
competitiveTier,26.0,27.0,27.0,333835.0,27.0,26.663373,25.0,0.567392
damagePerRoundValue,119.153846,145.111111,173.25,333835.0,461.0,147.668696,0.0,41.148931
damageReceivedValue,2497.0,2930.0,3354.0,333835.0,8020.0,2918.106265,0.0,711.47083
damageValue,2438.0,3083.0,3784.0,333835.0,11217.0,3144.430006,0.0,1023.790909


In [9]:
df_matches_raw_mode = (
                        df_matches_raw[[
                        
                         
                            'competitiveTier', 'rankedRating', 'hasWon', 'result',
                            'agentName', 'playtimeValue', 'roundsPlayedValue',
                            'roundsWonValue', 'roundsLostValue', 'roundsDisconnectedValue',
                            'scoreValue', 'killsValue', 'deathsValue', 'assistsValue', 'damageValue',
                            'damageReceivedValue', 'headshotsValue', 'grenadeCastsValue', 'ability1CastsValue',
                            'ability2CastsValue', 'ultimateCastsValue', 'dealtHeadshotsValue', 'headshotsPercentageValue',
                            'dealtBodyshotsValue', 'dealtLegshotsValue', 'econRatingValue', 'suicidesValue', 'revivedValue', 'firstBloodsValue',
                            'firstDeathsValue', 'lastDeathsValue', 'survivedValue', 'tradedValue', 'kastedValue', 'kASTValue',
                            'flawlessValue', 'thriftyValue', 'acesValue', 'teamAcesValue', 'clutchesValue', 'clutchesLostValue',
                            'plantsValue', 'defusesValue', 'kdRatioValue', 'scorePerRoundValue', 'damagePerRoundValue',]]
                        .mode()
                        .reset_index(drop = True)
)
df_matches_raw_mode

Unnamed: 0,competitiveTier,rankedRating,hasWon,result,agentName,playtimeValue,roundsPlayedValue,roundsWonValue,roundsLostValue,roundsDisconnectedValue,...,thriftyValue,acesValue,teamAcesValue,clutchesValue,clutchesLostValue,plantsValue,defusesValue,kdRatioValue,scorePerRoundValue,damagePerRoundValue
0,27,404,True,victory,Chamber,2095,24,13,13,0.0,...,,0,0,1.0,1.0,0,0,1.0,214.0,140.0


## EDA Matches

### Days

In [10]:
matches_per_day = (
    
                    df_matches_raw.groupby(df_matches_raw.dateMatch)['matchId']
                    .count()
                    .reset_index(name='count')
                    .reset_index(drop = True)
                    )


In [11]:
fig = px.line(x = matches_per_day['dateMatch'], y = matches_per_day['count'], title= 'Number of matches per day')
fig.show()

In [70]:
weekDayOrdered = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
mean_matches_per_days_week = (
                                matches_per_day.groupby(matches_per_day.weekDay)['count']
                                .mean()
                                .reindex(weekDayOrdered) 
                                .reset_index(name = 'mean')
                                .reset_index(drop = True)
                                .round(3)
                                )

Unnamed: 0,weekDay,mean
0,Monday,491.258
1,Tuesday,489.913
2,Wednesday,547.624
3,Thursday,507.958
4,Friday,523.363
5,Saturday,522.174
6,Sunday,497.052


In [69]:
fig = px.line(mean_matches_per_days_week, x = 'weekDay', y = 'mean', title = 'Means of matches per days of week')
fig.show()

In [14]:
means_time_matches = df_matches_raw[['matchId', 'weekDay', 'playtimeValue']]
means_time_matches['playtimeValueMinutes'] = means_time_matches.playtimeValue/60

means_time_matches = (
                        means_time_matches.groupby('weekDay')['playtimeValueMinutes']
                        .mean()
                        .reindex(weekDayOrdered)
                        .reset_index(name = 'mean')
                        .reset_index(drop = True)                     
                        .round(2)
)

In [15]:
fig = px.line(means_time_matches, x = 'weekDay', y = 'mean', title = 'Means time per days of week')
fig.show()

In [16]:
top_seven_days_matches = (
                            df_matches_raw.groupby(['dateMatch', 'weekDay'])['weekDay']
                            .count()
                            .reset_index(name = 'count')
                            .reset_index(drop = True)
                            .sort_values('count', ascending = False)
                            .head(7)
                            )

In [17]:
fig = px.bar(top_seven_days_matches, x='weekDay', y='count',
             hover_data=['dateMatch', 'count'], color='count', height=400, title = 'Top seven days with most matches played')
fig.show()

### Plants and Defuses

In [18]:
matches_plat_defuse = (
                        df_matches_raw.groupby(['result'])[['plantsValue', 'defusesValue']]
                        .sum()
                        .sort_values('plantsValue', ascending = False)
                        .reset_index()
)

In [53]:
fig = px.bar(matches_plat_defuse, x = 'result', y = 'plantsValue', text_auto='.2s',
            title="Default: various text sizes, positions and angles", color = 'result')
fig.show()

fig = px.bar(matches_plat_defuse, x = 'result', y = 'defusesValue', text_auto='.2s',
            title="Default: various text sizes, positions and angles", color = 'result')
fig.show()

In [64]:
fig = go.Figure()
fig.add_trace(go.Box(y=matches_plat_defuse['plantsValue'],
                    name='plantsValue'))
fig.add_trace(go.Box(y=matches_plat_defuse['defusesValue'],
                    name='defusesValue'))
fig.show()

## EDA Players

### Matches

In [22]:
matches_per_player = (
                        df_matches_raw.groupby(['userId'])['userId']
                        .count()
                        .reset_index(name = 'count')
                        .reset_index(drop = True)
                        )

In [23]:
fig = px.line(matches_per_player, x = 'userId', y = 'count', title = 'Number of matches per player')
fig.show()

In [73]:
fig = px.box(matches_per_player, y = 'count', title = 'Matches in intervals between quartiles')
fig.show()

### Time

In [24]:
total_time_player = df_matches_raw.groupby(['userId'])[['playtimeValue']].sum().reset_index()
total_time_player['playtimeValueMinutes'] = (df_matches_raw['playtimeValue']/60).round(2)

In [25]:
fig = px.line(total_time_player, x = 'userId', y = 'playtimeValueMinutes', title = 'Means time per days of week')
fig.show()

In [74]:
fig = px.box(total_time_player, y="playtimeValueMinutes", title = 'Time in intervals between quartiles')
fig.show()

### Results

In [97]:
number_wins_player = (
                        df_matches_raw.groupby(['userId', 'result'])['result']
                        .count()
                        .reset_index(name = 'count')
                        .sort_values(['count'], ascending = False)
                        .reset_index(drop = True)
                        )

In [96]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=number_wins_player.query('result == "victory"')['count'],
                    name='Victory'))
fig.add_trace(go.Scatter(y=number_wins_player.query('result == "defeat"')['count'],
                    name='Victory'))
fig.add_trace(go.Scatter(y=number_wins_player.query('result == "tied"')['count'],
                    name='Victory'))                    

fig.show();

In [28]:
fig = px.box(number_wins_player, x = "result", y = "count", color = "result")
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
fig.show()

### Headshots

In [98]:
number_headshot_per_player = df_matches_raw.groupby(['userId'])['headshotsValue'].sum().reset_index(name = 'sum').sort_values('sum', ascending=False).reset_index(drop = True)

In [99]:
fig = px.line(number_headshot_per_player, x = 'userId', y = 'sum', title = 'Means time per days of week')
fig.show()

In [31]:
fig = px.box(number_headshot_per_player, y="sum", title = 'Interval of time played between quartiles by players')
fig.show()

### KDA

In [32]:
number_kda_player = (
                        df_matches_raw.groupby(['userId'])[['killsValue', 'deathsValue', 'assistsValue']]
                        .sum()
                        .sort_values(['killsValue', 'deathsValue', 'assistsValue'], ascending = False)
                        .reset_index(drop = True)
                        )

In [35]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=number_kda_player['userId'], y=number_kda_player['killsValue'],
                    mode='lines',
                    name='killsValue'))
fig.add_trace(go.Scatter(x=number_kda_player['userId'], y=number_kda_player['deathsValue'],
                    mode='lines',
                    name='deathsValue'))
fig.add_trace(go.Scatter(x=number_kda_player['userId'], y=number_kda_player['assistsValue'],
                    mode='lines',
                    name='assistsValue'))

fig.show()

In [33]:
fig = go.Figure()

for col in number_kda_player:
  fig.add_trace(go.Box(y=number_kda_player[col].values, name=number_kda_player[col].name))
  
fig.show()

### Damage

In [36]:
number_damage_player = df_matches_raw.groupby(['userId'])['damageValue','damageReceivedValue'].sum().reset_index()
number_damage_player

Unnamed: 0,userId,damageValue,damageReceivedValue
0,Z%232000,591960,489766
1,00xampa%23HNDEK,570882,529516
2,010 farewell%23RRR,625194,562953
3,06 cAstAwAy 06%23YARGI,509785,483893
4,06 발로 프로지망%23USW,503640,480782
...,...,...,...
1866,한석봉%23blebe,492563,507864
1867,한섭새끼들다썰고아섭으로도망감%23이지까까,542942,538871
1868,허 민 규%235841,551367,536824
1869,헬스장덤벨연쇄절도범%233대500,607158,553900


In [37]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=number_damage_player['userId'], y=number_damage_player['damageValue'],
                    mode='lines',
                    name='damageValue'))
fig.add_trace(go.Scatter(x=number_damage_player['userId'], y=number_damage_player['damageReceivedValue'],
                    mode='lines',
                    name='damageReceivedValue'))


fig.show()

In [38]:
fig = go.Figure()
fig.add_trace(go.Box(y=number_damage_player['damageValue'], name = 'damageValue'))
fig.add_trace(go.Box(y=number_damage_player['damageReceivedValue'], name = 'damageReceivedValue'))

fig.show()


### Economy efficiency per player

In [39]:
number_economy_player = df_matches_raw.groupby(['userId'])['econRatingValue'].mean().sort_values(ascending = False).reset_index()
number_economy_player

Unnamed: 0,userId,econRatingValue
0,felipedm%23tiger,109.777778
1,vlzkcb%231030,104.118812
2,xogglefn%23dwish,92.611111
3,eu amo a isa%23dela,89.151515
4,GansTerw0w%23Ali,89.105556
...,...,...
1866,FrosttKinggTTV%23LFT,47.394444
1867,Masaru%23SKYE,46.961111
1868,ODK Otsukel%23peso,46.727778
1869,KIPER COMPRESSAO%23ANAU,46.416667


In [40]:
fig = go.Figure()
fig.add_trace(go.Box(y=number_economy_player['econRatingValue'], name = 'econRatingValue' ))


fig.show()

In [41]:
fig = go.Figure()
fig.add_trace(go.Bar(x=number_economy_player['userId'], y=number_economy_player['econRatingValue'],
                    name='econRatingValue'))



fig.show()

In [42]:
df_matches_raw.columns

Index(['matchId', 'timestamp', 'dateMatch', 'hourMatch', 'weekDay', 'mapName',
       'mapImageUrl', 'seasonName', 'userId', 'server', 'leaderboardRank',
       'competitiveTier', 'rankedRating', 'hasWon', 'result', 'agentName',
       'playtimeValue', 'roundsPlayedValue', 'roundsWonValue',
       'roundsLostValue', 'roundsDisconnectedValue', 'scoreValue',
       'killsValue', 'deathsValue', 'assistsValue', 'damageValue',
       'damageReceivedValue', 'headshotsValue', 'grenadeCastsValue',
       'ability1CastsValue', 'ability2CastsValue', 'ultimateCastsValue',
       'dealtHeadshotsValue', 'dealtBodyshotsValue', 'dealtLegshotsValue',
       'econRatingValue', 'suicidesValue', 'revivedValue', 'firstBloodsValue',
       'firstDeathsValue', 'lastDeathsValue', 'survivedValue', 'tradedValue',
       'kastedValue', 'kASTValue', 'flawlessValue', 'thriftyValue',
       'acesValue', 'teamAcesValue', 'clutchesValue', 'clutchesLostValue',
       'plantsValue', 'defusesValue', 'kdRatioValue', 'sc

## EDA Agents

### Number of matches per agents

In [43]:
number_matches_agents = (
                             df_matches_raw.groupby(['agentName'])['agentName']
                            .count() 
                            .reset_index(name='count') 
                            .sort_values(['count'], ascending=False) 
                            .reset_index(drop = True)
)

number_matches_agents.head(3)

Unnamed: 0,agentName,count
0,Chamber,65118
1,Jett,45990
2,Reyna,26302


In [44]:
fig = px.bar(number_matches_agents, y='count', x='agentName', text_auto='.4s', orientation='v',
            title="Number of matches an agent has been selected")
fig.update_traces(textfont_size=10, textangle=0, textposition="outside", cliponaxis=False)

fig.show()

### Results per agent

In [45]:
matches_agents_result = df_matches_raw[['agentName','result']].groupby(['agentName','result',])['result'] \
                             .count() \
                             .reset_index(name='count') \
                             .sort_values(['agentName'], ascending=False) \
                             .reset_index(drop = True)

matches_agents_result.head(3)

Unnamed: 0,agentName,result,count
0,Yoru,victory,1315
1,Yoru,tied,29
2,Yoru,defeat,1233


In [46]:
fig = px.histogram(matches_agents_result, x='agentName', y='count', color='result', barmode='group',
             height=400)
fig.show()

In [47]:
fig = px.box(matches_agents_result, x = "result", y = "count", color = "result")
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
fig.show()

### Number ultimate per agent

In [48]:
ultimate_agente = df_matches_raw.groupby(['agentName'])['ultimateCastsValue'].sum().sort_values(ascending = False).reset_index()
ultimate_agente.head(3)

Unnamed: 0,agentName,ultimateCastsValue
0,Chamber,160399.0
1,Jett,132604.0
2,Fade,71138.0


In [49]:
fig = px.histogram(ultimate_agente, x='agentName', y='ultimateCastsValue')
fig.show()

### Number KDA per agent

In [50]:
number_kda_agent = (
                        df_matches_raw.groupby(['agentName'])[['killsValue', 'deathsValue', 'assistsValue']]
                        .sum()
                        # .sort_values(['agentName'], ascending = False)
                        .reset_index()
                        )

number_kda_agent.head(3)

Unnamed: 0,agentName,killsValue,deathsValue,assistsValue
0,Astra,124800,114246,47912
1,Breach,134974,137818,67801
2,Brimstone,221443,210565,115844


In [51]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=number_kda_agent['agentName'], y=number_kda_player['killsValue'],
                    mode='lines',
                    name='killsValue'))
fig.add_trace(go.Scatter(x=number_kda_agent['agentName'], y=number_kda_player['deathsValue'],
                    mode='lines',
                    name='deathsValue'))
fig.add_trace(go.Scatter(x=number_kda_agent['agentName'], y=number_kda_player['assistsValue'],
                    mode='lines',
                    name='assistsValue'))

fig.show()