# Data processing to book

## Libs

In [71]:
import pandas as pd
import pathlib
from datetime import date

In [72]:
abs_path = pathlib.Path.cwd().parent

## Dataset Dimenssions

### Players

In [73]:
file_read = pathlib.Path(abs_path).joinpath('data').joinpath('players').joinpath('players.csv')
file_write = pathlib.Path(abs_path).joinpath('data').joinpath('players').joinpath('players_processed.csv')

In [74]:
df_players_raw = pd.read_csv(file_read)

In [75]:
df_players_raw = df_players_raw[[
                                 'meta.shard', 'meta.updatedAt', 'leaderboards.gameName','leaderboards.tagLine',
                                 'leaderboards.full_nickname','leaderboards.leaderboardRank','leaderboards.rankedRating',
                                 'leaderboards.numberOfWins', 'leaderboards.competitiveTier','leaderboards.stat.winRatio',
                                 'leaderboards.stat.avgScore', 'leaderboards.stat.headshotRatio',
                                 ]]

In [76]:
df_players_renamed = df_players_raw.rename(columns={'meta.shard': 'server', 'meta.updatedAt': 'updatedAt',
                                      'leaderboards.gameName': 'gameName', 'leaderboards.tagLine': 'tagLine',
                                      'leaderboards.full_nickname': 'userId', 'leaderboards.leaderboardRank': 'leaderboardRank',
                                      'leaderboards.rankedRating': 'rankedRating', 'leaderboards.numberOfWins': 'numberOfWins',
                                      'leaderboards.competitiveTier': 'competitiveTier', 'leaderboards.stat.winRatio': 'winRatio',
                                      'leaderboards.stat.avgScore': 'avgScore', 'leaderboards.stat.headshotRatio': 'headshotRatio',})

In [77]:
df_players = df_players_renamed.dropna().reset_index(drop=True)

In [78]:
df_players.to_csv(file_write)

### Matches

In [79]:
file_read = pathlib.Path(abs_path).joinpath('data').joinpath('matches').joinpath('matches.csv')
file_write = pathlib.Path(abs_path).joinpath('data').joinpath('matches').joinpath('matches_processed.csv')

In [80]:
df_matches_raw = pd.read_csv(file_read)

In [81]:
df_matches_raw['dateMatch'] = df_matches_raw['timestamp'].apply(lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))
df_matches_raw['hourMatch'] = df_matches_raw['timestamp'].apply(lambda x: pd.Timestamp(x).strftime('%H'))
df_matches_raw['playtimeMinutesValue'] = (df_matches_raw['playtimeValue']/60).round(2)
df_matches_raw['playtimeHoursValue'] = (df_matches_raw['playtimeValue']/3600).round(2)
df_matches_raw['weekDay'] = df_matches_raw['dateMatch'].astype('datetime64').dt.day_name()

In [82]:
df_matches_cleaned = df_matches_raw.drop(columns='userId')
df_matches_renamed = df_matches_cleaned.rename(columns={'player' : 'userId'})

In [83]:
df_matches_merged = df_matches_renamed.merge(df_players[["userId", "leaderboardRank", 'competitiveTier', 'rankedRating','server',]])

In [84]:
df_matches_clean = df_matches_merged[[
                                    'matchId', 'timestamp', 'dateMatch', 'hourMatch', 'weekDay', 'mapName', 'mapImageUrl',
                                    'seasonName', 'userId', 'server', 'leaderboardRank', 'competitiveTier', 'rankedRating',
                                    'hasWon', 'result', 'agentName', 'playtimeValue', 'playtimeMinutesValue','playtimeHoursValue','roundsPlayedValue', 
                                    'roundsWonValue', 'roundsLostValue', 'roundsDisconnectedValue', 'scoreValue',
                                    'killsValue', 'deathsValue', 'assistsValue', 'damageValue', 'damageReceivedValue',
                                    'headshotsValue', 'grenadeCastsValue', 'ability1CastsValue',  'ability2CastsValue', 
                                    'ultimateCastsValue', 'dealtHeadshotsValue', 'dealtBodyshotsValue', 'dealtLegshotsValue', 
                                    'econRatingValue', 'suicidesValue', 'revivedValue','firstBloodsValue', 'firstDeathsValue',
                                    'lastDeathsValue', 'survivedValue', 'tradedValue', 'kastedValue', 'kASTValue', 'flawlessValue', 
                                    'thriftyValue', 'acesValue', 'teamAcesValue', 'clutchesValue', 'clutchesLostValue', 'plantsValue',
                                    'defusesValue', 'kdRatioValue', 'scorePerRoundValue', 'damagePerRoundValue', 'headshotsPercentageValue', 
                                ]]

In [85]:
df_matches_clean = df_matches_clean.fillna(0)

In [86]:
df_matches_clean.to_csv(file_write)

### Weapons

In [87]:
file_read = pathlib.Path(abs_path).joinpath('data').joinpath('weapons').joinpath('weapons.csv')
file_write = pathlib.Path(abs_path).joinpath('data').joinpath('weapons').joinpath('weapons_processed.csv')

In [88]:
df_weapons_raw = pd.read_csv(file_read, low_memory=False)

In [89]:
df_weapons_renamed = df_weapons_raw.rename(columns={'player': 'userId'})

In [90]:
df_weapons_clean = df_weapons_renamed[[
    'userId', 'weaponName', 'matchesPlayedValue', 'matchesWonValue', 'matchesLostValue',
    'matchesTiedValue', 'matchesWinPctValue', 'roundsPlayedValue', 'killsValue',
    'killsPerRoundValue', 'secondaryKillsValue', 'headshotsValue', 'deathsValue', 'kDRatioValue',
    'headshotsPercentageValue', 'damageValue', 'damagePerRoundValue', 'damagePerMatchValue',
    'damageReceivedValue', 'dealtHeadshotsValue', 'dealtBodyshotsValue', 'dealtLegshotsValue',
    'killDistanceValue', 'avgKillDistanceValue', 'longestKillDistanceValue',
]]

In [91]:
df_weapons_clean.to_csv(file_write)