# Create ABT

## Initial Configs

### Imports

In [34]:
import os
import sys
from pyspark.sql import SparkSession
from pyspark.sql.functions import avg, col, expr, count, sum, max, udf, dayofweek, date_format, when, mean, median
from pyspark.sql.types import StringType
sys.path.append('../src/')
from aws.aws import Aws
import io
import pandas as pd 
import boto3
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")

### Crating spark session

In [35]:
spark = SparkSession.builder.appName("ValorantDataAnalysis").getOrCreate()

spark.conf.set("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
spark.conf.set("spark.hadoop.fs.s3a.access.key", os.getenv('AWS_ACCESS_KEY_ID'))
spark.conf.set("spark.hadoop.fs.s3a.secret.key", os.getenv('AWS_SECRET_ACCESS_KEY'))

### Instantiating used classes

In [36]:
aws = Aws()

## Load dataframes

### Utils

In [37]:
def get_files(bucket_name : str, folder_path : str) -> list:
    """"""
    objects = aws.list_objetcs_s3(bucket_name, folder_path)


    return objects

def concat_files_s3(objects):
    """"""

    json_files = [obj['Key'] for obj in objects]

    for file in json_files:

        response = aws.read_s3_v2(bucket_name='s3-tcc-fia-valorant', folder_path=file)
        json_data = response['Body'].read().decode('utf-8')

    return io.StringIO(json_data)

def read_spark(data_io):
    """"""
    data_io = pd.read_csv(data_io)
    return spark.createDataFrame(data_io)

def create_dataframe(bucket_name : str, folder_path : str):
    """"""
    objects = get_files(bucket_name, folder_path)
    data_io = concat_files_s3(objects)
    df = read_spark(data_io)

    return df

def save_dataframe_csv(bucket_name, folder_path, file_name, data, file_format):
    # Convert DataFrame to CSV string
    csv_buffer = io.StringIO()
    data.toPandas().to_csv(csv_buffer, index=False)

    # Retrieve CSV data from buffer
    csv_buffer_value = csv_buffer.getvalue()

    date = datetime.now().strftime("%Y%m%d_%H%M%S")
    file_name = file_name + '_' + date + file_format
    file_path = folder_path + file_name

    # Write CSV string to S3
    s3 = boto3.resource('s3')

    try:
        s3.Object(bucket_name, file_path).put(Body=csv_buffer_value)
        print(f"Data was written to S3://{bucket_name}/{folder_path}")

    except Exception as e:

        print(f"Error: {e}")
    
        return False

### df_player_book

In [38]:
df_player_book = create_dataframe('s3-tcc-fia-valorant', 'valorant/refined/player-book/')
df_player_book.show(2)

+--------------------+------------------------+-------------------+------------+----------+------------------+-----------+---------------------+---------------------+-----------+------------+-------------+--------------+------------+----------------------+----------------------------+------------------+------------------+------------------+------------------+-----------------+-----------------+-------------------+--------------------+--------------------+--------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------+-------------+-----------------+------------------+---------+---------------------------+----------------------------+-----------------+-----------------+----------+--------------+--------------------+---------------------------+--------------------+-------+----------+-------+----------------+-----------------+----------------+----------------+-----------------+------------------+-

In [39]:
df_player_abt = (
    df_player_book
    .select(
        'result_number',
        'avg_spent_credits',
        'avg_score_value',
        'avg_deaths_value',
        'avg_kills_value',
        'avg_assists_value',
        'avg_damage_value',
        'avg_grenade_casts_value',
        'avg_ability1_casts_value',
        'avg_ability2_casts_value',
        'avg_ultimate_casts_value',
        'avg_plants_value',
        'avg_defuses_value',
        'avg_first_kills_value',
        'avg_first_deaths_value',
        'avg_esr_value',
        'avg_econ_rating_value',
        'avg_kast_value',
        'avg_clutches_value',
        'avg_trn_performance_score_value',
        'avg_team_score_value',
        'avg_team_kills_value',
        'avg_team_deaths_value',
        'avg_team_assists_value',
        'avg_team_damage_value',
        'mode_name_number',
        'mode_key_number',
        'map_name_number',
        'is_available_number',
        'weapon_name_number',
        'ultimate_casts_per_round_value',
        'plants_value',
        'hour_match',
        'minutes_match',
        'playtime_minutes_value',
        'playtime_hours_value',
        'total_players_match',
        'total_players_team',
        'week_day_number',
        'agent_name_number',
        'rank_number_value',
        'current_rank_number_value',
        'team_number_id',
        'defuses_value',
        'first_kills_value',
        'first_deaths_value',
        'esr_value',
        'first_kills_per_round_value',
        'first_deaths_per_round_value',
        'econ_rating_value',
        'hs_accuracy_value',
        'kast_value',
        'clutches_value',
        'rounds_win_pct_value',
        'trn_performance_score_value',
        'rounds_won_value',
        'rounds_lost_value',
        'team_score_value',
        'team_kills_value',
        'team_deaths_value',
        'team_assists_value',
        'team_damage_value',
        'max_total_kills_weapon_name',
        'total_spent_credits',
        'total_rounds',
        'score_value',
        'score_per_round_value',
        'kills_per_round_value',
        'kills_per_round_value',
        'deaths_value',
        'assists_value',
        'kd_ratio_value',
        'damage_value',
        'damage_per_round_value',
        'damage_delta_per_round_value',
        'single_kills_value',
        'double_kills_value',
        'triple_kills_value',
        'quadra_kills_value',
        'penta_kills_value',
        'multi_kills_value',
        'grenade_casts_value',
        'ability1_casts_value',
        'ability2_casts_value',
        'ultimate_casts_value',
        'grenade_casts_per_round_value',
        'ability1_casts_per_round_value',
        'ability2_casts_per_round_value',
                      ))

In [40]:
df_player_abt.show(2)

+-------------+-----------------+------------------+------------------+------------------+------------------+------------------+-----------------------+------------------------+------------------------+------------------------+------------------+------------------+---------------------+----------------------+-----------------+---------------------+-----------------+------------------+-------------------------------+--------------------+--------------------+---------------------+----------------------+---------------------+----------------+---------------+---------------+-------------------+------------------+------------------------------+------------+----------+-------------+----------------------+--------------------+-------------------+------------------+---------------+-----------------+-----------------+-------------------------+--------------+-------------+-----------------+------------------+---------+---------------------------+----------------------------+-----------------+---

In [41]:
df_player_abt.count()

2992

In [42]:
df_player_abt = df_player_abt.dropna()
df_player_abt.count()

2193

In [43]:
df_player_abt = df_player_abt.where((df_player_abt.total_players_team >= 5) & (df_player_abt.total_players_match >= 10))
df_player_abt.count()

2159

In [44]:
save_dataframe_csv('s3-tcc-fia-valorant', 'valorant/refined/player-abt/', 'player-book', df_player_abt, '.csv')

Data was written to S3://s3-tcc-fia-valorant/valorant/refined/player-abt/
