# Data Analisys Valorant

## Initial Configs

### Imports

In [73]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import avg, col, expr, count, sum
# from ..src.aws.aws import Aws
import sys
sys.path.append('../src/')
from aws.aws import Aws
import io
import pandas as pd 

### Creating a Spark Session

In [2]:
spark = SparkSession.builder.appName("CreateDataFrame").getOrCreate()

### Instantiating used classes

In [3]:
aws = Aws()

## Load Dataframes

### Utils

In [4]:
def get_files(bucket_name : str, folder_path : str) -> list:
    """"""
    # objects = aws.list_objetcs_s3(bucket_name = 's3-tcc-fia-valorant', folder_path = 'valorant/raw/summary/matches/')
    objects = aws.list_objetcs_s3(bucket_name, folder_path)


    return objects

In [5]:
def concat_files_s3(objects):
    """"""

    json_files = [obj['Key'] for obj in objects]

    for file in json_files:

        response = aws.read_s3_v2(bucket_name='s3-tcc-fia-valorant', folder_path=file)
        json_data = response['Body'].read().decode('utf-8')

    return io.StringIO(json_data)

In [6]:
def read_spark(data_io):
    """"""
    data_io = pd.read_csv(data_io)
    return spark.createDataFrame(data_io)

In [7]:
def create_dataframe(bucket_name : str, folder_path : str):
    """"""
    objects = get_files(bucket_name, folder_path)
    data_io = concat_files_s3(objects)
    df = read_spark(data_io)

    return df

### df_matches_summary_data

In [8]:
df_matches_summary_data = create_dataframe('s3-tcc-fia-valorant', 'valorant/raw/summary/matches/')
df_matches_summary_data.show(5)

+---------------+--------------------+--------------------+--------------------+-------+-----------+--------------------+-------------+-----------+--------------------+--------------+--------------------+-------+--------------------+----------+------+-------+---------+-------------+--------------------+-----------------+------------------------+--------------+---------------------+---------------+----------------------+-----------------------+------------------------------+--------------+---------------------+----------+-----------------+----------+-----------------+-----------+------------------+------------+-------------------+-----------+------------------+-------------------+--------------------------+--------------+---------------------+-----------------+------------------------+------------------+-------------------------+------------------+-------------------------+------------------+-------------------------+-------------------+--------------------------+-------------------+---

### df_match_metadata_data

In [9]:
df_match_metadata_data = create_dataframe('s3-tcc-fia-valorant', 'valorant/cleaned/details/metadata/')
df_match_metadata_data.show(5)

+--------------------+--------------------+-------+-----------+--------------------+-------------+--------+--------------------+------+--------+-----------+--------------------+--------------------+-------+--------------------+
|             matchId|          expiryDate|modeKey|   modeName|        modeImageUrl|modeMaxRounds|duration|         dateStarted|rounds|isRanked|    queueId|            seasonId|                 map|mapName|         mapImageUrl|
+--------------------+--------------------+-------+-----------+--------------------+-------------+--------+--------------------+------+--------+-----------+--------------------+--------------------+-------+--------------------+
|01a8725d-fed5-468...|2023-05-23T19:06:...|   bomb|Competitive|https://trackercd...|           25| 2260290|2022-06-11T03:01:...|    22|    true|competitive|3e47230a-463c-a30...|2fb9a4fd-47b8-4e7...| Breeze|https://trackercd...|
|01d95487-8f75-47e...|2023-05-23T19:05:...|   bomb|Competitive|https://trackercd...|    

### df_round_summary_data

In [10]:
df_round_summary_data = create_dataframe('s3-tcc-fia-valorant', 'valorant/cleaned/details/round_summary/')
df_round_summary_data.show(5)

+--------------------+-----+----------------+-----------------------+----------------+-----------------------+
|             matchId|Round|RoundresultValue|RoundresultDisplayValue|WinningteamValue|WinningteamDisplayValue|
+--------------------+-----+----------------+-----------------------+----------------+-----------------------+
|01a8725d-fed5-468...|    1|     Elimination|            Elimination|             Red|                    Red|
|01a8725d-fed5-468...|    2|          Defuse|                 Defuse|            Blue|                   Blue|
|01a8725d-fed5-468...|    3|     Elimination|            Elimination|            Blue|                   Blue|
|01a8725d-fed5-468...|    4|        Detonate|               Detonate|             Red|                    Red|
|01a8725d-fed5-468...|    5|     Elimination|            Elimination|             Red|                    Red|
+--------------------+-----+----------------+-----------------------+----------------+-----------------------+
o

### df_player_summary_data

In [11]:
df_player_summary_data = create_dataframe('s3-tcc-fia-valorant', 'valorant/cleaned/details/player_summary/')
df_player_summary_data.show(5)

+--------------------+----------------------+---------+----------------+-------------+--------------------+----------+-----------------+------------------+-------------------------+------------------+-------------------------+----------+-----------------+-----------+------------------+------------+-------------------+------------------+-------------------+-----------+------------------+-------------------+--------------------------+------------------------+-------------------------------+----------------+-----------------------+----------------+-----------------------+----------------+-----------------------+----------------+-----------------------+---------------+----------------------+---------------+----------------------+-----------------+------------------------+------------------+-------------------------+------------------+-------------------------+------------------+-------------------------+-------------------------+--------------------------------+--------------------------+-

### df_player_loadout_data

In [12]:
df_player_loadout_data = create_dataframe('s3-tcc-fia-valorant', 'valorant/cleaned/details/player_loadout/')
df_player_loadout_data.show(5)

+--------------------+----------------------+-------+------------+---------+----------------+
|             MatchId|PlatformUserIdentifier|Loadout|    StatName|StatValue|StatDisplayValue|
+--------------------+----------------------+-------+------------+---------+----------------+
|01a8725d-fed5-468...|             noumu#yoi| pistol|       Kills|      0.0|               0|
|01a8725d-fed5-468...|             noumu#yoi| pistol|      Deaths|      2.0|               2|
|01a8725d-fed5-468...|             noumu#yoi| pistol|     Kdratio|      0.0|            0.00|
|01a8725d-fed5-468...|             noumu#yoi| pistol|     Assists|      0.0|               0|
|01a8725d-fed5-468...|             noumu#yoi| pistol|Roundsplayed|      2.0|               2|
+--------------------+----------------------+-------+------------+---------+----------------+
only showing top 5 rows



### df_player_round_damage_data

In [13]:
df_player_round_damage_data = create_dataframe('s3-tcc-fia-valorant', 'valorant/cleaned/details/player_round_damage/')
df_player_round_damage_data.show(5)

+--------------------+----------------------+------------------------------+-----+-----------+------------------+-------------+--------------------+--------------+---------------------+--------------+---------------------+
|             MatchId|platformUserIdentifier|opponentPlatformUserIdentifier|Round|DamageValue|DamageDisplayValue|LegshotsValue|LegshotsDisplayValue|BodyshotsValue|BodyshotsDisplayValue|HeadshotsValue|HeadshotsDisplayValue|
+--------------------+----------------------+------------------------------+-----+-----------+------------------+-------------+--------------------+--------------+---------------------+--------------+---------------------+
|01a8725d-fed5-468...|           Brunno#9168|                     X4TO#8453|    1|       78.0|                78|          NaN|                 NaN|           NaN|                  NaN|           NaN|                  NaN|
|01a8725d-fed5-468...|           Brunno#9168|                     X4TO#8453|    1|        NaN|              

### df_player_round_kills_data

In [14]:
df_player_round_kills_data = create_dataframe('s3-tcc-fia-valorant', 'valorant/cleaned/details/player_round_kills/')
df_player_round_kills_data.show(5)

+--------------------+----------------------+------------------------------+-----+-----------+------------------+-------------------------+---------------------------+-------------------------------+-----------------------------------+----------------------+---------------------------------+---------------------------------+-----------------------------------+---------------------------------------+-------------------------------------------+------------------------------+-----------------------------------------+------------------+------------------+--------------------+--------------------------+--------------------------+-----------------------------------+--------------------+----------+--------------+--------+---------+--------------------+---+---+---+
|             MatchId|platformUserIdentifier|opponentPlatformUserIdentifier|Round|DamageValue|DamageDisplayValue|Platforminfo_Platformslug|Platforminfo_Platformuserid|Platforminfo_Platformuserhandle|Platforminfo_Platformuseridentifi

### df_player_round_data

In [15]:
df_player_round_data = create_dataframe('s3-tcc-fia-valorant', 'valorant/cleaned/details/player_round/')
df_player_round_data.show(5)

+--------------------+----------------------+-----+------------------------+-----------------------+-----------------------+------------------------+----------------------+-----------------------------+---------------------------------+----------------------+-----------------------------+-----------------+----------------+----------------+-----------------+---------------+----------------------+--------------------------+---------------+----------------------+
|             MatchId|platformUserIdentifier|Round|StatDisplayValue_Assists|StatDisplayValue_Damage|StatDisplayValue_Deaths|StatDisplayValue_Kdratio|StatDisplayValue_Kills|StatDisplayValue_Loadoutvalue|StatDisplayValue_Remainingcredits|StatDisplayValue_Score|StatDisplayValue_Spentcredits|StatValue_Assists|StatValue_Damage|StatValue_Deaths|StatValue_Kdratio|StatValue_Kills|StatValue_Loadoutvalue|StatValue_Remainingcredits|StatValue_Score|StatValue_Spentcredits|
+--------------------+----------------------+-----+-------------------

### df_team_summary_data

In [16]:
df_team_summary_data = create_dataframe('s3-tcc-fia-valorant', 'valorant/cleaned/details/team_summary/')
df_team_summary_data.show(5)

+--------------------+------+------+--------------+---------------------+---------------+----------------------+----------+-----------------+----------+-----------------+-----------+------------------+------------+-------------------+-----------+------------------+
|             matchId|teamId|hasWon|RoundswonValue|RoundswonDisplayValue|RoundslostValue|RoundslostDisplayValue|ScoreValue|ScoreDisplayValue|KillsValue|KillsDisplayValue|DeathsValue|DeathsDisplayValue|AssistsValue|AssistsDisplayValue|DamageValue|DamageDisplayValue|
+--------------------+------+------+--------------+---------------------+---------------+----------------------+----------+-----------------+----------+-----------------+-----------+------------------+------------+-------------------+-----------+------------------+
|01a8725d-fed5-468...|   Red| false|             9|                    9|             13|                    13|     22114|           22,114|        76|               76|         81|                81| 

## Books

### Rename Columns

#### df_team_summary_data

In [17]:
df_team_summary_data = df_team_summary_data.withColumnRenamed('RoundswonValue', 'roundsWonValue')
df_team_summary_data = df_team_summary_data.withColumnRenamed('RoundslostValue', 'roundslostValue')
df_team_summary_data = df_team_summary_data.withColumnRenamed('ScoreValue', 'scoreValue')
df_team_summary_data = df_team_summary_data.withColumnRenamed('KillsValue', 'killsValue')
df_team_summary_data = df_team_summary_data.withColumnRenamed('DeathsValue', 'deathsValue')
df_team_summary_data = df_team_summary_data.withColumnRenamed('AssistsValue', 'assistsValue')
df_team_summary_data = df_team_summary_data.withColumnRenamed('DamageValue', 'damageValue')

In [18]:
df_team_summary_data.columns

['matchId',
 'teamId',
 'hasWon',
 'roundsWonValue',
 'RoundswonDisplayValue',
 'roundslostValue',
 'RoundslostDisplayValue',
 'scoreValue',
 'ScoreDisplayValue',
 'killsValue',
 'KillsDisplayValue',
 'deathsValue',
 'DeathsDisplayValue',
 'assistsValue',
 'AssistsDisplayValue',
 'damageValue',
 'DamageDisplayValue']

#### df_player_round_data

In [19]:
df_player_round_data = df_player_round_data.withColumnRenamed('MatchId', 'matchId')
df_player_round_data = df_player_round_data.withColumnRenamed('Round', 'round')
df_player_round_data = df_player_round_data.withColumnRenamed('StatValue_Assists', 'assistsValue')
df_player_round_data = df_player_round_data.withColumnRenamed('StatValue_Damage', 'damageValue')
df_player_round_data = df_player_round_data.withColumnRenamed('StatValue_Deaths', 'deathsValue')
df_player_round_data = df_player_round_data.withColumnRenamed('StatValue_Deaths', 'deathsValue')
df_player_round_data = df_player_round_data.withColumnRenamed('StatValue_Kdratio', 'kdRatio')
df_player_round_data = df_player_round_data.withColumnRenamed('StatValue_Kills', 'killsValue')
df_player_round_data = df_player_round_data.withColumnRenamed('StatValue_Loadoutvalue', 'loadoutValue')
df_player_round_data = df_player_round_data.withColumnRenamed('StatValue_Remainingcredits', 'remainingCredits')
df_player_round_data = df_player_round_data.withColumnRenamed('StatValue_Score', 'scoreValue')
df_player_round_data = df_player_round_data.withColumnRenamed('StatValue_Spentcredits', 'spentCredits')

In [20]:
df_player_round_data.columns

['matchId',
 'platformUserIdentifier',
 'round',
 'StatDisplayValue_Assists',
 'StatDisplayValue_Damage',
 'StatDisplayValue_Deaths',
 'StatDisplayValue_Kdratio',
 'StatDisplayValue_Kills',
 'StatDisplayValue_Loadoutvalue',
 'StatDisplayValue_Remainingcredits',
 'StatDisplayValue_Score',
 'StatDisplayValue_Spentcredits',
 'assistsValue',
 'damageValue',
 'deathsValue',
 'kdRatio',
 'killsValue',
 'loadoutValue',
 'remainingCredits',
 'scoreValue',
 'spentCredits']

#### df_player_loadout_data

In [21]:
df_player_loadout_data = df_player_loadout_data.withColumnRenamed('MatchId', 'matchId')
df_player_loadout_data = df_player_loadout_data.withColumnRenamed('latformUserIdentifier', 'platformUserIdentifier')
df_player_loadout_data = df_player_loadout_data.withColumnRenamed('Loadout', 'loadout')
df_player_loadout_data = df_player_loadout_data.withColumnRenamed('StatName', 'statName')
df_player_loadout_data = df_player_loadout_data.withColumnRenamed('StatValue', 'statValue')

In [22]:
df_player_loadout_data.columns

['matchId',
 'PlatformUserIdentifier',
 'loadout',
 'statName',
 'statValue',
 'StatDisplayValue']

#### df_player_round_kills_data

In [23]:
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('MatchId', 'matchId')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Round', 'round')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('DamageValue', 'damageValue')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Platforminfo_Platformslug', 'platformInfoSlug')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Platforminfo_Platformuserid', 'platformInfoUserId')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Platforminfo_Platformuserhandle', 'platformInfoUserHandle')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Platforminfo_Platformuseridentifier', 'platformInfoUserIdentifier')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Platforminfo_Avatarurl', 'avatarUrl')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Platforminfo_Additionalparameters', 'additionalParameters')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Opponentplatforminfo_Platformslug', 'opponentPlatformInfoSlug')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Opponentplatforminfo_Platformuserid', 'opponentPlatformInfoUserId')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Opponentplatforminfo_Platformuserhandle', 'opponentPlatformInfoUserHandle')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Opponentplatforminfo_Platformuseridentifier', 'opponentPlatformInfoUserIdentifier')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Opponentplatforminfo_Avatarurl', 'opponentAvatarUrl')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Opponentplatforminfo_Additionalparameters', 'opponentAdditionalParameters')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Opponentlocation_X', 'opponentLocationX')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Opponentlocation_Y', 'opponentLocationY')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Playerlocations', 'playerLocations')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Finishingdamage_Damagetype', 'finishingDamageType')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Finishingdamage_Damageitem', 'finishingDamageItem')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Finishingdamage_Issecondaryfiremode', 'finishingDamageIsSecondaryFireMode')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Weaponimageurl', 'weaponImageUrl')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Weaponname', 'weaponName')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Weaponcategory', 'weaponCategory')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Gametime', 'gameTime')
df_player_round_kills_data = df_player_round_kills_data.withColumnRenamed('Roundtime', 'roundTime')

In [24]:
df_player_round_kills_data.columns

['matchId',
 'platformUserIdentifier',
 'opponentPlatformUserIdentifier',
 'round',
 'damageValue',
 'DamageDisplayValue',
 'platformInfoSlug',
 'platformInfoUserId',
 'platformInfoUserHandle',
 'platformInfoUserIdentifier',
 'avatarUrl',
 'additionalParameters',
 'opponentPlatformInfoSlug',
 'opponentPlatformInfoUserId',
 'opponentPlatformInfoUserHandle',
 'opponentPlatformInfoUserIdentifier',
 'opponentAvatarUrl',
 'opponentAdditionalParameters',
 'opponentLocationX',
 'opponentLocationY',
 'playerLocations',
 'finishingDamageType',
 'finishingDamageItem',
 'finishingDamageIsSecondaryFireMode',
 'weaponImageUrl',
 'weaponName',
 'weaponCategory',
 'gameTime',
 'roundTime',
 '0',
 '1',
 '2',
 '3']

#### df_player_round_damage_data

In [25]:
df_player_round_damage_data = df_player_round_damage_data.withColumnRenamed('MatchId', 'matchId')
df_player_round_damage_data = df_player_round_damage_data.withColumnRenamed('Round', 'round')
df_player_round_damage_data = df_player_round_damage_data.withColumnRenamed('DamageValue', 'damageValue')
df_player_round_damage_data = df_player_round_damage_data.withColumnRenamed('LegshotsValue', 'legShotsValue')
df_player_round_damage_data = df_player_round_damage_data.withColumnRenamed('BodyshotsValue', 'bodyShotsValue')
df_player_round_damage_data = df_player_round_damage_data.withColumnRenamed('HeadshotsValue', 'headShotsValue')
df_player_round_damage_data = df_player_round_damage_data.withColumnRenamed('HeadshotsValue', 'headShotsValue')

In [26]:
df_player_round_damage_data.columns

['matchId',
 'platformUserIdentifier',
 'opponentPlatformUserIdentifier',
 'round',
 'damageValue',
 'DamageDisplayValue',
 'legShotsValue',
 'LegshotsDisplayValue',
 'bodyShotsValue',
 'BodyshotsDisplayValue',
 'headShotsValue',
 'HeadshotsDisplayValue']

#### df_round_summary_data

In [27]:
df_round_summary_data = df_round_summary_data.withColumnRenamed('Round', 'round')
df_round_summary_data = df_round_summary_data.withColumnRenamed('RoundresultValue', 'roundResultValue')
df_round_summary_data = df_round_summary_data.withColumnRenamed('WinningteamValue', 'winningTeamValue')

In [28]:
df_round_summary_data.columns

['matchId',
 'round',
 'roundResultValue',
 'RoundresultDisplayValue',
 'winningTeamValue',
 'WinningteamDisplayValue']

#### df_player_summary_data

In [29]:
df_player_summary_data = df_player_summary_data.withColumnRenamed('MatchId', 'matchId')
df_player_summary_data = df_player_summary_data.withColumnRenamed('RankValue', 'rankValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('CurrrankValue', 'currRankValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('ScoreValue', 'scoreValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('ScoreperroundValue', 'scorePerRoundValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('KillsperroundValue', 'killsPerRoundValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('KillsValue', 'killsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('DeathsValue', 'deathsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('AssistsValue', 'assistsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('KdratioValue', 'kdRatioValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('DamageValue', 'damageValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('DamageperroundValue', 'damagePerRoundValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('DamagedeltaperroundValue', 'damageDeltaPerRoundValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('SinglekillsValue', 'singleKillsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('DoublekillsValue', 'doubleKillsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('TriplekillsValue', 'tripleKillsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('QuadrakillsValue', 'quadraKillsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('PentakillsValue', 'pentaKillsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('MultikillsValue', 'multiKillsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('GrenadecastsValue', 'grenadeCastsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Ability1CastsValue', 'ability1CastsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Ability2CastsValue', 'ability2CastsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('UltimatecastsValue', 'ultimateCastsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('GrenadecastsperroundValue', 'grenadeCastsPerRoundValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Ability1CastsperroundValue', 'ability1CastsPerRoundValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Ability2CastsperroundValue', 'ability2CastsPerRoundValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('UltimatecastsperroundValue', 'ultimateCastsPerRoundValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('PlantsValue', 'plantsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('DefusesValue', 'defusesValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('FirstkillsValue', 'firstKillsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('FirstdeathsValue', 'firstDeathsValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('EsrValue', 'esrValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('FirstkillsperroundValue', 'firstKillsPerRoundValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('FirstdeathsperroundValue', 'firstDeathsPerRoundValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('EconratingValue', 'econRatingValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('HsaccuracyValue', 'hsAccuracyValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('KastValue', 'KastValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('ClutchesValue', 'clutchesValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('RoundswinpctValue', 'roundsWinPctValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('TrnperformancescoreValue', 'trnPerformanceScoreValue')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Partyid', 'partyId')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Teamid', 'teamId')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Agentkey', 'agentKey')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Agentname', 'agentName')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Agentcolor', 'agentColor')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Agentimageurl', 'agentImageUrl')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Agentportraiturl', 'agentPortraitUrl')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Countrycode', 'countryCode')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Platforminfo_Platformslug', 'platformInfoSlug')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Platforminfo_Platformuserid', 'PlatformInfoUserId')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Platforminfo_Platformuserhandle', 'platformInfoUserHandle')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Platforminfo_Avatarurl', 'avatarUrl')
df_player_summary_data = df_player_summary_data.withColumnRenamed('Platforminfo_Additionalparameters', 'additionalInfoParameters')

In [30]:
df_player_summary_data.columns

['matchId',
 'platformUserIdentifier',
 'rankValue',
 'RankDisplayValue',
 'currRankValue',
 'CurrrankDisplayValue',
 'scoreValue',
 'ScoreDisplayValue',
 'scorePerRoundValue',
 'ScoreperroundDisplayValue',
 'killsPerRoundValue',
 'KillsperroundDisplayValue',
 'killsValue',
 'KillsDisplayValue',
 'deathsValue',
 'DeathsDisplayValue',
 'assistsValue',
 'AssistsDisplayValue',
 'kdRatioValue',
 'KdratioDisplayValue',
 'damageValue',
 'DamageDisplayValue',
 'damagePerRoundValue',
 'DamageperroundDisplayValue',
 'damageDeltaPerRoundValue',
 'DamagedeltaperroundDisplayValue',
 'singleKillsValue',
 'SinglekillsDisplayValue',
 'doubleKillsValue',
 'DoublekillsDisplayValue',
 'tripleKillsValue',
 'TriplekillsDisplayValue',
 'quadraKillsValue',
 'QuadrakillsDisplayValue',
 'pentaKillsValue',
 'PentakillsDisplayValue',
 'multiKillsValue',
 'MultikillsDisplayValue',
 'grenadeCastsValue',
 'GrenadecastsDisplayValue',
 'ability1CastsValue',
 'Ability1CastsDisplayValue',
 'ability2CastsValue',
 'Abil

### Matches Summary

In [31]:
df_matches_summary_book = (
    df_matches_summary_data
    .select(
        'userId', 'matchId', 'agentName', 'modeKey', 'modeName', 'modeMaxRounds', 'isAvailable',
        'timestamp', 'metadataResult', 'hasWon', 'result', 'mapName', 'seasonName', 'playtimeValue',
        'roundsPlayedValue', 'roundsWonValue', 'roundsLostValue', 'roundsDisconnectedValue',
        'placementValue', 'scoreValue', 'killsValue', 'deathsValue', 'assistsValue', 'damageValue',
        'damageReceivedValue', 'headshotsValue', 'grenadeCastsValue', 'ability1CastsValue',
        'ability2CastsValue', 'ultimateCastsValue', 'dealtHeadshotsValue', 'dealtBodyshotsValue',
        'dealtLegshotsValue', 'econRatingValue', 'suicidesValue', 'revivedValue', 'firstBloodsValue',
        'firstDeathsValue', 'lastDeathsValue', 'survivedValue', 'tradedValue', 'kastedValue', 'kASTValue',
        'flawlessValue', 'thriftyValue', 'acesValue', 'teamAcesValue', 'clutchesValue', 'clutchesLostValue',
        'plantsValue', 'defusesValue', 'kdRatioValue', 'scorePerRoundValue', 'damagePerRoundValue',
        'headshotsPercentageValue',  'damageDeltaValue', 'damageDeltaPerRoundValue', 'rankValue',
        'trnPerformanceScoreValue',
        )
)

In [32]:
df_matches_summary_book.show(2)

+---------------+--------------------+---------+-------+-----------+-------------+-----------+--------------------+--------------+------+-------+-------+----------+-------------+-----------------+--------------+---------------+-----------------------+--------------+----------+----------+-----------+------------+-----------+-------------------+--------------+-----------------+------------------+------------------+------------------+-------------------+-------------------+------------------+---------------+-------------+------------+----------------+----------------+---------------+-------------+-----------+-----------+---------+-------------+------------+---------+-------------+-------------+-----------------+-----------+------------+------------------+------------------+-------------------+------------------------+----------------+------------------------+---------+------------------------+
|         userId|             matchId|agentName|modeKey|   modeName|modeMaxRounds|isAvailable|  

### Match Metadata

In [33]:
df_match_metadata_book = (
    df_match_metadata_data
    .select(
        'matchId', 'expiryDate', 'modeKey',
        'modeName', 'modeMaxRounds', 'duration',
        'dateStarted', 'rounds', 'isRanked',
        'queueId', 'mapName', 
    )
)

In [34]:
df_match_metadata_book.show(2)

+--------------------+--------------------+-------+-----------+-------------+--------+--------------------+------+--------+-----------+-------+
|             matchId|          expiryDate|modeKey|   modeName|modeMaxRounds|duration|         dateStarted|rounds|isRanked|    queueId|mapName|
+--------------------+--------------------+-------+-----------+-------------+--------+--------------------+------+--------+-----------+-------+
|01a8725d-fed5-468...|2023-05-23T19:06:...|   bomb|Competitive|           25| 2260290|2022-06-11T03:01:...|    22|    true|competitive| Breeze|
|01d95487-8f75-47e...|2023-05-23T19:05:...|   bomb|Competitive|           25| 2760162|2022-09-24T02:56:...|    28|    true|competitive|  Pearl|
+--------------------+--------------------+-------+-----------+-------------+--------+--------------------+------+--------+-----------+-------+
only showing top 2 rows



### Round Summary

In [35]:
df_round_summary_book = (
    df_round_summary_data
    .select(
        'matchId', 'round', 'roundResultValue', 'winningTeamValue', 
    )
)

In [36]:
df_round_summary_book.show(2)

+--------------------+-----+----------------+----------------+
|             matchId|round|roundResultValue|winningTeamValue|
+--------------------+-----+----------------+----------------+
|01a8725d-fed5-468...|    1|     Elimination|             Red|
|01a8725d-fed5-468...|    2|          Defuse|            Blue|
+--------------------+-----+----------------+----------------+
only showing top 2 rows



### Player Summary

In [37]:
df_player_summary_book = (
    df_player_summary_data
    .select(
        'matchId', 'platformUserIdentifier', 'rankValue', 'currRankValue',
        'scoreValue', 'scorePerRoundValue', 'killsPerRoundValue', 'killsValue',
        'deathsValue', 'assistsValue', 'kdRatioValue', 'damageValue', 'damagePerRoundValue',
        'damageDeltaPerRoundValue', 'singleKillsValue', 'doubleKillsValue', 'tripleKillsValue',
        'quadraKillsValue', 'pentaKillsValue', 'multiKillsValue', 'grenadeCastsValue', 'ability1CastsValue',
        'ability2CastsValue', 'ultimateCastsValue', 'grenadeCastsPerRoundValue', 'ability1CastsPerRoundValue',
        'ability2CastsPerRoundValue', 'ultimateCastsPerRoundValue', 'plantsValue', 'defusesValue', 'firstKillsValue',
        'firstDeathsValue', 'esrValue', 'firstKillsPerRoundValue', 'firstDeathsPerRoundValue', 'econRatingValue',
        'hsAccuracyValue', 'KastValue', 'clutchesValue', 'roundsWinPctValue', 'trnPerformanceScoreValue', 'partyId',
        'teamId', 'agentName'
    )
)


In [38]:
df_player_summary_book.show(2)

+--------------------+----------------------+---------+-------------+----------+------------------+------------------+----------+-----------+------------+------------------+-----------+-------------------+------------------------+----------------+----------------+----------------+----------------+---------------+---------------+-----------------+------------------+------------------+------------------+-------------------------+--------------------------+--------------------------+--------------------------+-----------+------------+---------------+----------------+-----------------+-----------------------+------------------------+---------------+-----------------+---------+-------------+-----------------+------------------------+--------------------+------+---------+
|             matchId|platformUserIdentifier|rankValue|currRankValue|scoreValue|scorePerRoundValue|killsPerRoundValue|killsValue|deathsValue|assistsValue|      kdRatioValue|damageValue|damagePerRoundValue|damageDeltaPerRoun

### Player Loadout

In [39]:
df_player_loadout_book = (
    df_player_loadout_data
    .select(
        'matchId', 'PlatformUserIdentifier', 'loadout',
        'statName', 'statValue'
    )
)

In [40]:
df_player_loadout_book.show(5)

+--------------------+----------------------+-------+------------+---------+
|             matchId|PlatformUserIdentifier|loadout|    statName|statValue|
+--------------------+----------------------+-------+------------+---------+
|01a8725d-fed5-468...|             noumu#yoi| pistol|       Kills|      0.0|
|01a8725d-fed5-468...|             noumu#yoi| pistol|      Deaths|      2.0|
|01a8725d-fed5-468...|             noumu#yoi| pistol|     Kdratio|      0.0|
|01a8725d-fed5-468...|             noumu#yoi| pistol|     Assists|      0.0|
|01a8725d-fed5-468...|             noumu#yoi| pistol|Roundsplayed|      2.0|
+--------------------+----------------------+-------+------------+---------+
only showing top 5 rows



### Player Round Damage

In [41]:
df_player_round_damage_book = (
    df_player_round_damage_data
    .select(
        'matchId', 'platformUserIdentifier', 'opponentPlatformUserIdentifier', 'round',
        'damageValue', 'legShotsValue', 'bodyShotsValue', 
    )
)

In [42]:
df_player_round_damage_book.show(2)

+--------------------+----------------------+------------------------------+-----+-----------+-------------+--------------+
|             matchId|platformUserIdentifier|opponentPlatformUserIdentifier|round|damageValue|legShotsValue|bodyShotsValue|
+--------------------+----------------------+------------------------------+-----+-----------+-------------+--------------+
|01a8725d-fed5-468...|           Brunno#9168|                     X4TO#8453|    1|       78.0|          NaN|           NaN|
|01a8725d-fed5-468...|           Brunno#9168|                     X4TO#8453|    1|        NaN|          0.0|           NaN|
+--------------------+----------------------+------------------------------+-----+-----------+-------------+--------------+
only showing top 2 rows



### Player Round Kills

In [69]:
df_player_round_kills_book = (
    df_player_round_kills_data
    .select(
        'matchId', 'platformUserIdentifier', 'opponentPlatformUserIdentifier', 'round', 
        'damageValue', 'opponentLocationX', 'opponentLocationY', 'finishingDamageType', 'finishingDamageIsSecondaryFireMode',
        'weaponName', 'weaponCategory', 'gameTime', 'roundTime'
    )
)

In [70]:
df_player_round_kills_book.show(2)

+--------------------+----------------------+------------------------------+-----+-----------+-----------------+-----------------+-------------------+----------------------------------+----------+--------------+--------+---------+
|             matchId|platformUserIdentifier|opponentPlatformUserIdentifier|round|damageValue|opponentLocationX|opponentLocationY|finishingDamageType|finishingDamageIsSecondaryFireMode|weaponName|weaponCategory|gameTime|roundTime|
+--------------------+----------------------+------------------------------+-----+-----------+-----------------+-----------------+-------------------+----------------------------------+----------+--------------+--------+---------+
|01a8725d-fed5-468...|           Brunno#9168|                     X4TO#8453|    1|        100|             8994|            -5381|             Weapon|                             false|   Classic|      Sidearms|  116569|    50202|
|01a8725d-fed5-468...|      Blackwilson#8380|                     noumu#yoi|

### Player Round

In [45]:
df_player_round_book = (
    df_player_round_data
    .select(
        'matchId', 'platformUserIdentifier', 'round', 
        'assistsValue', 'damageValue', 'deathsValue', 
        'kdRatio', 'killsValue', 'loadoutValue', 'remainingCredits',
        'scoreValue', 'spentCredits'
    )
)

In [46]:
df_player_round_book.show(2)

+--------------------+----------------------+-----+------------+-----------+-----------+-------+----------+------------+----------------+----------+------------+
|             matchId|platformUserIdentifier|round|assistsValue|damageValue|deathsValue|kdRatio|killsValue|loadoutValue|remainingCredits|scoreValue|spentCredits|
+--------------------+----------------------+-----+------------+-----------+-----------+-------+----------+------------+----------------+----------+------------+
|01a8725d-fed5-468...|      Blackwilson#8380|    1|         0.0|      324.0|        0.0|    3.0|       3.0|       800.0|           200.0|     733.0|       600.0|
|01a8725d-fed5-468...|      Blackwilson#8380|    2|         0.0|      114.0|        1.0|    1.0|       1.0|      4800.0|             0.0|     250.0|      4100.0|
+--------------------+----------------------+-----+------------+-----------+-----------+-------+----------+------------+----------------+----------+------------+
only showing top 2 rows



### Team Summary

In [47]:
df_team_summary_book = (
    df_team_summary_data
    .select(
        'matchId', 'teamId', 'hasWon', 'roundsWonValue', 'roundslostValue',
        'scoreValue', 'killsValue', 'deathsValue', 'assistsValue', 'damageValue'
    )
)

In [48]:
df_team_summary_book.show(2)

+--------------------+------+------+--------------+---------------+----------+----------+-----------+------------+-----------+
|             matchId|teamId|hasWon|roundsWonValue|roundslostValue|scoreValue|killsValue|deathsValue|assistsValue|damageValue|
+--------------------+------+------+--------------+---------------+----------+----------+-----------+------------+-----------+
|01a8725d-fed5-468...|   Red| false|             9|             13|     22114|        76|         81|          17|      13954|
|01a8725d-fed5-468...|  Blue|  true|            13|              9|     23601|        81|         76|          23|      15051|
+--------------------+------+------+--------------+---------------+----------+----------+-----------+------------+-----------+
only showing top 2 rows



## ABT Player

In [49]:
df_player_abt = df_player_summary_book

In [67]:
df_player_abt = (
    df_player_abt.alias("p")
    .join(df_team_summary_book.alias("t"), 
          (col("p.matchId") == col("t.matchId")) & 
          (col("p.teamId") == col("t.teamId")),
          "inner")
    .select(
        col("p.*"),
        col("t.hasWon"),
        col("t.roundsWonValue"),
        col("t.roundslostValue"),
        col("t.scoreValue").alias("teamScoreValue"),
        col("t.killsValue").alias("teamKillsValue"),
        col("t.deathsValue").alias("teamDeathsValue"),
        col("t.assistsValue").alias("teamAssistsValue"),
        col("t.damageValue").alias("teamDamageValue")
    )
)

In [77]:
df_player_abt = (
    df_player_abt.alias("p")
    .join(df_player_round_book.alias("t"), 
          (col("p.matchId") == col("t.matchId")) & 
          (col("p.platformUserIdentifier") == col("t.platformUserIdentifier")),
          "inner")
    .groupBy("p.matchId", "p.platformUserIdentifier")
    .agg(
        sum("t.spentCredits").alias("totalSpentCredits"),
        count("t.round").alias("totalRounds")

    )
    .join(df_player_abt, ["matchId", "platformUserIdentifier"], "inner")
)

df_player_abt.show()


+--------------------+----------------------+-----------------+-----------+-----------------+-----------+-----------------+---------+-------------+----------+------------------+------------------+----------+-----------+------------+------------------+-----------+-------------------+------------------------+----------------+----------------+----------------+----------------+---------------+---------------+-----------------+------------------+------------------+------------------+-------------------------+--------------------------+--------------------------+--------------------------+-----------+------------+---------------+----------------+------------------+-----------------------+------------------------+---------------+------------------+---------+-------------+------------------+------------------------+--------------------+------+---------+------+--------------+---------------+--------------+--------------+---------------+----------------+---------------+
|             matchId|pla

In [68]:
df_player_abt.show(2)

+--------------------+----------------------+---------+-------------+----------+------------------+------------------+----------+-----------+------------+------------------+-----------+-------------------+------------------------+----------------+----------------+----------------+----------------+---------------+---------------+-----------------+------------------+------------------+------------------+-------------------------+--------------------------+--------------------------+--------------------------+-----------+------------+---------------+----------------+-----------------+-----------------------+------------------------+---------------+------------------+---------+-------------+-----------------+------------------------+--------------------+------+---------+------+--------------+---------------+--------------+--------------+---------------+----------------+---------------+
|             matchId|platformUserIdentifier|rankValue|currRankValue|scoreValue|scorePerRoundValue|killsP