# Exploratory Data Analysis - Valorant

## Libs

In [None]:
# Data manipulation
import pandas as pd
from datetime import datetime
import pathlib

In [None]:
# Charts
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")

## Initial Settings and Loads

In [None]:
abs_path = pathlib.Path.cwd().parent

### Load Matches

In [None]:
file_read = pathlib.Path(abs_path).joinpath('data').joinpath('matches').joinpath('matches_processed.csv')
df_matches_raw = pd.read_csv(file_read)
del df_matches_raw['Unnamed: 0']

### Load Players Rank

In [None]:
file_read = pathlib.Path(abs_path).joinpath('data').joinpath('players').joinpath('players_processed.csv')
df_players_raw = pd.read_csv(file_read)

### Load Weapons

In [None]:
file_read = pathlib.Path(abs_path).joinpath('data').joinpath('weapons').joinpath('weapons_processed.csv')
df_weapons_raw = pd.read_csv(file_read)
del df_weapons_raw['Unnamed: 0']

## Main Dataset Dimenssions

In [None]:
min_data = df_matches_raw.dateMatch.min()
max_data = df_matches_raw.dateMatch.max()
unique_userId = df_matches_raw.userId.nunique()
unique_mapId = df_matches_raw.mapName.nunique()
unique_sever = df_matches_raw.server.nunique()
unique_matches = df_matches_raw.matchId.nunique()

row_number = df_matches_raw.shape[0]
col_number = df_matches_raw.shape[1]

print('The dataset has a start date at : {} \nEnd date at : {} Match \nWith: unique players : {} \nMaps : {} \nGame servers : {} \nUnique Matches: {} \nRow number : {} \nColumn number : {}'. format(min_data, max_data, unique_userId, unique_mapId, unique_sever,unique_matches, row_number, col_number))

In [None]:
df_matches_raw_describe = df_matches_raw.describe()
df_matches_raw_describe_pivot = pd.pivot_table(df_matches_raw_describe, columns=["count", 'mean', 'std', 'min', '25%', '50%', '75%', 'max'])
df_matches_raw_describe_pivot

In [None]:
df_matches_raw_mode = (
                        df_matches_raw[[
                            'competitiveTier', 'rankedRating', 'hasWon', 'result',
                            'agentName', 'playtimeValue', 'roundsPlayedValue',
                            'roundsWonValue', 'roundsLostValue', 'roundsDisconnectedValue',
                            'scoreValue', 'killsValue', 'deathsValue', 'assistsValue', 'damageValue',
                            'damageReceivedValue', 'headshotsValue', 'grenadeCastsValue', 'ability1CastsValue',
                            'ability2CastsValue', 'ultimateCastsValue', 'dealtHeadshotsValue', 'headshotsPercentageValue',
                            'dealtBodyshotsValue', 'dealtLegshotsValue', 'econRatingValue', 'suicidesValue', 'revivedValue', 'firstBloodsValue',
                            'firstDeathsValue', 'lastDeathsValue', 'survivedValue', 'tradedValue', 'kastedValue', 'kASTValue',
                            'flawlessValue', 'thriftyValue', 'acesValue', 'teamAcesValue', 'clutchesValue', 'clutchesLostValue',
                            'plantsValue', 'defusesValue', 'kdRatioValue', 'scorePerRoundValue', 'damagePerRoundValue',]]
                        .mode()
                        .reset_index(drop = True)
)
df_matches_raw_mode

## EDA Matches

### Days

In [None]:
matches_per_day = (
    
                    df_matches_raw.groupby([df_matches_raw.dateMatch, df_matches_raw.weekDay])['matchId']
                    .count()
                    .reset_index(name='count')
                    .reset_index(drop = True)
                    )


In [None]:
fig = px.line(x = matches_per_day['dateMatch'], y = matches_per_day['count'], title= 'Number of matches per day')
fig.show()

In [None]:
weekDayOrdered = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
mean_matches_per_days_week = (
                                matches_per_day.groupby(matches_per_day.weekDay)['count']
                                .mean()
                                .reindex(weekDayOrdered) 
                                .reset_index(name = 'mean')
                                .reset_index(drop = True)
                                .round(3)
                                )

In [None]:
fig = px.line(mean_matches_per_days_week, x = 'weekDay', y = 'mean', title = 'Means of matches per days of week')
fig.show()

In [None]:
means_time_matches = df_matches_raw[['matchId', 'weekDay', 'playtimeValue']]
means_time_matches['playtimeValueMinutes'] = means_time_matches.playtimeValue/60

means_time_matches = (
                        means_time_matches.groupby('weekDay')['playtimeValueMinutes']
                        .mean()
                        .reindex(weekDayOrdered)
                        .reset_index(name = 'mean')
                        .reset_index(drop = True)                     
                        .round(2)
)

In [None]:
fig = px.line(means_time_matches, x = 'weekDay', y = 'mean', title = 'Means time per days of week')
fig.show()

In [None]:
top_seven_days_matches = (
                            df_matches_raw.groupby(['dateMatch', 'weekDay'])['weekDay']
                            .count()
                            .reset_index(name = 'count')
                            .reset_index(drop = True)
                            .sort_values('count', ascending = False)
                            .head(7)
                            )

In [None]:
fig = px.bar(top_seven_days_matches, x='weekDay', y='count',
             hover_data=['dateMatch', 'count'], color='count', height=400, title = 'Top seven days with most matches played')
fig.show()

### Plants and Defuses

In [None]:
matches_plat_defuse = (
                        df_matches_raw.groupby(['result'])[['plantsValue', 'defusesValue']]
                        .sum()
                        .sort_values('plantsValue', ascending = False)
                        .reset_index()
)

In [None]:
fig = px.bar(matches_plat_defuse, x = 'result', y = 'plantsValue', text_auto='.2s',
            title="Default: various text sizes, positions and angles", color = 'result')
fig.show()

fig = px.bar(matches_plat_defuse, x = 'result', y = 'defusesValue', text_auto='.2s',
            title="Default: various text sizes, positions and angles", color = 'result')
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(y=matches_plat_defuse['plantsValue'],
                    name='plantsValue'))
fig.add_trace(go.Box(y=matches_plat_defuse['defusesValue'],
                    name='defusesValue'))
fig.show()

## EDA Players

### Matches

In [None]:
matches_per_player = (
                        df_matches_raw.groupby(['userId'])['userId']
                        .count()
                        .reset_index(name = 'count')
                        .reset_index(drop = True)
                        )

In [None]:
fig = px.line(matches_per_player, x = 'userId', y = 'count', title = 'Number of matches per player')
fig.show()

In [None]:
fig = px.box(matches_per_player, y = 'count', title = 'Matches in intervals between quartiles')
fig.show()

### Time

In [None]:
total_time_player = df_matches_raw.groupby(['userId'])[['playtimeValue']].sum().reset_index()
total_time_player['playtimeValueMinutes'] = (df_matches_raw['playtimeValue']/60).round(2)

In [None]:
fig = px.line(total_time_player, x = 'userId', y = 'playtimeValueMinutes', title = 'Means time per days of week')
fig.show()

In [None]:
fig = px.box(total_time_player, y="playtimeValueMinutes", title = 'Time in intervals between quartiles')
fig.show()

### Results

In [None]:
number_wins_player = (
                        df_matches_raw.groupby(['userId', 'result'])['result']
                        .count()
                        .reset_index(name = 'count')
                        .sort_values(['count'], ascending = False)
                        .reset_index(drop = True)
                        )

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=number_wins_player.query('result == "victory"')['count'],
                    name='Victory'))
fig.add_trace(go.Scatter(y=number_wins_player.query('result == "defeat"')['count'],
                    name='Victory'))
fig.add_trace(go.Scatter(y=number_wins_player.query('result == "tied"')['count'],
                    name='Victory'))                    

fig.show();

In [None]:
fig = px.box(number_wins_player, x = "result", y = "count", color = "result")
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
fig.show()

### Headshots

In [None]:
number_headshot_per_player = df_matches_raw.groupby(['userId'])['headshotsValue'].sum().reset_index(name = 'sum').sort_values('sum', ascending=False).reset_index(drop = True)

In [None]:
fig = px.line(number_headshot_per_player, x = 'userId', y = 'sum', title = 'Means time per days of week')
fig.show()

In [None]:
fig = px.box(number_headshot_per_player, y="sum", title = 'Interval of time played between quartiles by players')
fig.show()

### KDA

In [None]:
number_kda_player = (
                        df_matches_raw.groupby('userId')[['killsValue', 'deathsValue', 'assistsValue','userId']]
                        .sum()
                        .sort_values(['killsValue', 'deathsValue', 'assistsValue'], ascending = False)
                        .reset_index()

                        )

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=number_kda_player['userId'], y=number_kda_player['killsValue'],
                    mode='lines',
                    name='killsValue'))
fig.add_trace(go.Scatter(x=number_kda_player['userId'], y=number_kda_player['deathsValue'],
                    mode='lines',
                    name='deathsValue'))
fig.add_trace(go.Scatter(x=number_kda_player['userId'], y=number_kda_player['assistsValue'],
                    mode='lines',
                    name='assistsValue'))

fig.show()

In [None]:
number_kda_player = number_kda_player.drop(columns = 'userId')

fig = go.Figure()

for col in number_kda_player:
  fig.add_trace(go.Box(y=number_kda_player[col].values, name=number_kda_player[col].name))
  
fig.show()

### Damage

In [None]:
number_damage_player = df_matches_raw.groupby(['userId'])['damageValue','damageReceivedValue'].sum().reset_index()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=number_damage_player['userId'], y=number_damage_player['damageValue'],
                    mode='lines',
                    name='damageValue'))
fig.add_trace(go.Scatter(x=number_damage_player['userId'], y=number_damage_player['damageReceivedValue'],
                    mode='lines',
                    name='damageReceivedValue'))


fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(y=number_damage_player['damageValue'], name = 'damageValue'))
fig.add_trace(go.Box(y=number_damage_player['damageReceivedValue'], name = 'damageReceivedValue'))

fig.show()


### Economy

In [None]:
number_economy_player = df_matches_raw.groupby(['userId'])['econRatingValue'].mean().sort_values(ascending = False).reset_index()
number_economy_player

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(y=number_economy_player['econRatingValue'], name = 'econRatingValue' ))


fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(x=number_economy_player['userId'], y=number_economy_player['econRatingValue'],
                    name='econRatingValue'))



fig.show()

### Weapons

In [None]:
df_weapons_raw

## EDA Agents

### Matches

In [None]:
number_matches_agents = (
                             df_matches_raw.groupby(['agentName'])['agentName']
                            .count() 
                            .reset_index(name='count') 
                            .sort_values(['count'], ascending=False) 
                            .reset_index(drop = True)
)

In [None]:
fig = px.bar(number_matches_agents, y='count', x='agentName', text_auto='.4s', orientation='v',
            title="Number of matches an agent has been selected")
fig.update_traces(textfont_size=10, textangle=0, textposition="outside", cliponaxis=False)

fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(y=number_matches_agents['count'], name = 'count' ))


fig.show()

### Results

In [None]:
matches_agents_result = df_matches_raw[['agentName','result']].groupby(['agentName','result',])['result'] \
                             .count() \
                             .reset_index(name='count') \
                             .sort_values(['agentName'], ascending=False) \
                             .reset_index(drop = True)

In [None]:
fig = px.histogram(matches_agents_result, x='agentName', y='count', color='result', barmode='group',
             height=400)
fig.show()

In [None]:
fig = px.box(matches_agents_result, x = "result", y = "count", color = "result")
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
fig.show()

### Ultimate

In [None]:
ability_agente = df_matches_raw.groupby(['agentName'])[['ultimateCastsValue', 'ability1CastsValue', 'ability2CastsValue']].sum().sort_values('ultimateCastsValue', ascending = False).reset_index()
ability_agente

In [None]:
fig = px.histogram(ability_agente, x='agentName', y='ultimateCastsValue')
fig.show()

In [None]:
_ability_agente = ability_agente[['ultimateCastsValue', 'ability1CastsValue', 'ability2CastsValue']]

fig = go.Figure()

for col in _ability_agente:
  fig.add_trace(go.Box(y=_ability_agente[col].values, name=_ability_agente[col].name))
  
fig.show()

### KDA

In [None]:
number_kda_agent = (
                        df_matches_raw.groupby(['agentName'])[['killsValue', 'deathsValue', 'assistsValue']]
                        .sum()
                        # .sort_values(['agentName'], ascending = False)
                        .reset_index()
                        )

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=number_kda_agent['agentName'], y=number_kda_agent['killsValue'],
                    mode='lines',
                    name='killsValue'))
fig.add_trace(go.Scatter(x=number_kda_agent['agentName'], y=number_kda_agent['deathsValue'],
                    mode='lines',
                    name='deathsValue'))
fig.add_trace(go.Scatter(x=number_kda_agent['agentName'], y=number_kda_agent['assistsValue'],
                    mode='lines',
                    name='assistsValue'))

fig.show()

In [None]:
_kda_agente = number_kda_agent[['killsValue', 'deathsValue', 'assistsValue']]

fig = go.Figure()

for col in _kda_agente:
  fig.add_trace(go.Box(y=_kda_agente[col].values, name=_kda_agente[col].name))
  
fig.show()

## EDA Weapons

### Weapon

In [None]:
df_weapons_raw_describe = df_weapons_raw.describe()
df_weapons_raw_describe_pivot = pd.pivot_table(df_weapons_raw_describe, columns=["count", 'mean', 'std', 'min', '25%', '50%', '75%', 'max'])
df_weapons_raw_describe_pivot['maxRounded'] = df_weapons_raw_describe_pivot['max'].round(2)
df_weapons_raw_describe_pivot

In [None]:
most_used_weapons = (
                        df_weapons_raw.groupby('weaponName')['matchesPlayedValue']
                        .sum()
                        .reset_index(name = 'count')
                        .sort_values('count', ascending=False)
)

In [None]:
fig = px.bar(most_used_weapons, x = 'weaponName', y = 'count', color = 'count', title = 'Most Used Weapons')
fig.show()

### Result

In [None]:
weapons_result = (
                    df_weapons_raw.groupby('weaponName')[['matchesWonValue', 'matchesLostValue', 'matchesTiedValue']]
                    .sum()
                    .sort_values('matchesWonValue', ascending=False)
                    .reset_index()
)

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=weapons_result['weaponName'], y=weapons_result['matchesWonValue'],
                    mode='lines',
                    name='matchesWonValue'))
fig.add_trace(go.Scatter(x=weapons_result['weaponName'], y=weapons_result['matchesLostValue'],
                    mode='lines',
                    name='matchesLostValue'))
fig.add_trace(go.Scatter(x=weapons_result['weaponName'], y=weapons_result['matchesTiedValue'],
                    mode='lines',
                    name='matchesTiedValue'))

fig.show()

In [None]:
_weapons_result = weapons_result[['matchesWonValue', 'matchesLostValue', 'matchesTiedValue']]

fig = go.Figure()

for col in _weapons_result:
  fig.add_trace(go.Box(y=_weapons_result[col].values, name=_weapons_result[col].name))
  
fig.show()

### Damage

In [None]:
weapons_damage_avg = df_weapons_raw.groupby('weaponName')[['weaponName', 'damageValue','damagePerRoundValue', 'damagePerMatchValue']].mean().reset_index().round(3).sort_index(ascending=False)
weapons_damage_avg

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=weapons_damage_avg['weaponName'], y=weapons_damage_avg['damageValue'],
                    mode='lines',
                    name='damageValue'))

fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=weapons_damage_avg['weaponName'], y=weapons_damage_avg['damagePerRoundValue'],
                    mode='lines',
                    name='damagePerRoundValue'))
fig.add_trace(go.Scatter(x=weapons_damage_avg['weaponName'], y=weapons_damage_avg['damagePerMatchValue'],
                    mode='lines',
                    name='damagePerMatchValue'))

fig.show()

### Accuracy

In [None]:
weapons_accuracy = df_weapons_raw.groupby('weaponName')[['weaponName','dealtHeadshotsValue','dealtBodyshotsValue', 'dealtLegshotsValue']].sum().reset_index().sort_index(ascending = False)
weapons_accuracy

In [None]:
df_weapons_raw