In [1]:
import boto3
import os
from dotenv import load_dotenv
from datetime import datetime
from botocore.exceptions import ClientError
import logging

In [2]:
load_dotenv()

AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_S3_BUCKET = os.getenv("AWS_S3_BUCKET")

In [3]:
class AwsS3():

    
    def upload_file(data : object, path : str, file_format) -> bool:

        """
            Upload a file to an S3 bucket
            :param file_name: File to upload
            :param bucket: Bucket to upload to
            :param object_name: S3 object name. If not specified then file_name is used
            :return: True if file was uploaded, else False
        """

        date = datetime.now().strftime("_%Y%m%d_%H%M%S")
        file_name = 'valorant_reports{}{}'.format(date, file_format)
        input = path + file_name

        
        s3 = boto3.client("s3", aws_access_key_id = AWS_ACCESS_KEY_ID, aws_secret_access_key = AWS_SECRET_ACCESS_KEY)

        try:
            s3.put_object(Bucket = AWS_S3_BUCKET, Body = data, Key = input)

        except ClientError as e:
            logging.error(e)

            return False

        return True

    
    def get_file(path : str, file_name : str) -> str:

        """
            Get a file to an S3 bucket
            :param Path: Path to get
            :param bucket: Bucket to upload to
            :param object_name: S3 object name. If not specified then file_name is used
            :return: True if file was uploaded, else False
        """
        s3 = boto3.client('s3')
        
        try:

            response = s3.get_object(Bucket = AWS_S3_BUCKET, Key = file_name)
            data = response['Body'].read()
            data_str = data.decode('utf-8')

        except ClientError as e:
            logging.error(e)


        return data_str
        

    def get_files_list(path_read : str) -> list:

        s3 = boto3.resource('s3')
        bucket = s3.Bucket(AWS_S3_BUCKET)
        files_list = bucket.objects.filter(Prefix = path_read)
        files_list = list(files_list)
        
        if len(files_list) > 1: 
            del files_list[0]
        else:
            pass

        return files_list

In [4]:
from aws_s3 import AwsS3
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd
import requests
import json
import io


In [21]:
from pandas import read_csv



class Crawler():

    def get_top500_all_servers(servers_list) -> str:
        '''
            This function's mission is to get all players that are ranked in the top 500 of a server list.

            Input:
                [list] server_list: A variable that receives a server list. For example: 'kr', 'eu', 'na', 'br', 'latam', 'ap'.
            
            Output:
                [str] data: A variable that receives a string with the structure of a json. This string contains the summarized information of a player that who's in top 500.
        '''
        path_write = 'raw/trackergg/rank/top/all_servers'
        data = []
        df = pd.DataFrame()
        file_format = '.csv'

        for server in servers_list:
            
            for page in range(1, 5):
                response = requests.get('https://val.dakgg.io/api/v1/leaderboards/{}/aca29595-40e4-01f5-3f35-b1b3d304c96e?page={}&tier=top500'.format(server, page))
                data_aux = response.json()
                data.append(data_aux)


        df = pd.DataFrame(data)
        df = pd.json_normalize(json.loads(df.to_json(orient='records'))).explode('leaderboards')
        df = pd.json_normalize(json.loads(df.to_json(orient='records')))

        df['leaderboards.full_nickname'] = (df['leaderboards.gameName'].map(str) + '%23' + df['leaderboards.tagLine'].map(str))

        df.to_csv('data/top500_all_servers.csv')

        data = read_csv('data/top500_all_servers.csv')



        #TODO: Arrumar o upload para o S3. ERRO: expected string or bytes-like object
        # AwsS3.upload_file(data, path_write, file_format)

            
        return data

    
    def get_player_matches_report(player_name_tag) -> str:
        '''
            This function's mission is to get a summary report of all the last 200 matches of a specific player.

            Input:
                [str] player_name_tag: A variable that receives a player's nickname. For example: RayzenSama%236999 .
            Output:
                [str] data_pre: A variable that receives a string with the structure of a json. This string contains the summarized information of a player.
        '''

        options = webdriver.ChromeOptions()
        options.add_experimental_option('excludeSwitches', ['enable-logging'])
        driver = webdriver.Remote('http://127.0.0.1:4444/wd/hub', options = options)

        path_write = 'raw/trackergg/matches_report/player'
        file_format = '.txt'

        for page in range(0,10):
            
            driver.get('https://api.tracker.gg/api/v2/valorant/standard/matches/riot/{}?type=competitive&next={}'.format(player_name_tag, page))
            data_pre = driver.find_element('xpath', '//pre').text
            time.sleep(5)

            AwsS3.upload_file(data_pre, path_write, file_format)

            driver.quit()


        return data_pre

    
    def get_top500_players_matches_report(players_list) -> str:
        '''
            This function's mission is to get a summary report of all the last 200 matches of a specific player.

            Input:
                [list] players_list: A variable that receives a players list. For example: ['NaraKa%232299','NakaRa%233265','RayzenSama%236999'] .
            Output:
                [str] data_pre: A variable that receives a string with the structure of a json. This string contains the summarized information of a player who's in the top 500.
        '''

        options = webdriver.ChromeOptions()
        options.add_experimental_option('excludeSwitches', ['enable-logging'])
        driver = webdriver.Remote('http://127.0.0.1:4444/wd/hub', options = options)

        path_write = 'raw/trackergg/matches_report/top500/all_servers'
        file_format = '.txt'
        data = []
        for player in players_list:
            for page in range(0,10):
                
                driver.get('https://api.tracker.gg/api/v2/valorant/standard/matches/riot/{}?type=competitive&next={}'.format(player, page))
                data_pre = driver.find_element('xpath', '//pre').text
                data.append(data_pre)
            
            # AwsS3.upload_file(data_pre, path_write, file_format)

            time.sleep(5)

        driver.quit()


        return data


    def get_matches_report_detail(matches_list) -> str:
        ''''
            This function's mission is to get a detail report of a match.

            Input:
                [list] matches_list: A variable that receives a matches id list. For example: 2bee0dc9-4ffe-519b-1cbd-7fbe763a6047.
            Output:
                [str] data_pre: A variable that receives a string with the structure of a json. This string contains the detailed information of matches.
        '''

        options = webdriver.ChromeOptions()
        options.add_experimental_option('excludeSwitches', ['enable-logging'])
        driver = webdriver.Remote('http://127.0.0.1:4444/wd/hub', options = options)

        path_write = 'raw/trackergg/matches_report_details/top500/all_servers'
        file_format = '.txt'

        # matches = pd.read_csv("matches.csv")
        # matches = matches['match_id'].to_list()

        for matche in matches_list:
            
            driver.get('https://api.tracker.gg/api/v2/valorant/standard/matches/{}'.format(matche))
            data_pre = driver.find_element('xpath', '//pre').text
            time.sleep(5)
            
            AwsS3.upload_file(data_pre, path_write, file_format)

        driver.quit()

        return data_pre


    def get_gun_report() -> str:

        options = webdriver.ChromeOptions()
        options.add_experimental_option('excludeSwitches', ['enable-logging'])
        driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)

        path_write = 'raw/trackergg/gun_report/'
 
        driver.get('https://api.tracker.gg/api/v2/valorant/standard/profile/riot/RayzenSama%236999/segments/weapon?playlist=competitive&seasonId=')
        
        data_pre = driver.find_element('xpath', '//pre').text
        file_format = '.txt'
        
        AwsS3.upload_file(data_pre, path_write, file_format)

        driver.quit()

        return data_pre

In [1]:
from aws_s3 import AwsS3
import pandas as pd
import json

In [63]:
a = {'platformSlug': 'riot', 'platformUserIdentifier': 'lireco on point#111', 'modeKey': 'bomb', 'modeName': 'Competitive', 'modeImageUrl': 'https://trackercdn.com/cdn/tracker.gg/valorant/icons/modes/normal.png', 'modeMaxRounds': 25, 'isAvailable': True, 'timestamp': '2022-06-08T04:26:47.369+00:00', 'result': 'victory', 'map': 'b529448b-4d60-346e-e89e-00a4c527a405', 'mapName': 'Fracture', 'mapImageUrl': 'https://titles.trackercdn.com/valorant-api/maps/b529448b-4d60-346e-e89e-00a4c527a405/splash.png', 'seasonName': 'E4: A3', 'expiryDate': '0001-01-01T00:00:00+00:00', 'segment_type': 'overview', 'platformUserHandle': 'lireco on point#111', 'hasWon': True, 'agent': '22697a3d-45bf-8dd7-4fec-84a9e28c69d7', 'agentName': 'Chamber', 'agentColor': '#d3564f', 'agentImageUrl': 'https://titles.trackercdn.com/valorant-api/agents/22697a3d-45bf-8dd7-4fec-84a9e28c69d7/displayicon.png', 'match_id': 'bc926bc8-2625-463c-834f-4d32be7c2dd1', 'playtime_rank': None, 'playtime_percentile': None, 'playtime_displayName': 'Playtime', 'playtime_displayCategory': 'Combat', 'playtime_category': 'combat', 'playtime_metadata': {}, 'playtime_value': 2408, 'playtime_displayValue': '40m 08s', 'playtime_displayType': 'TimeSeconds', 'roundsPlayed_rank': None, 'roundsPlayed_percentile': None, 'roundsPlayed_displayName': 'rounds_played', 'roundsPlayed_displayCategory': 'Combat', 'roundsPlayed_category': 'combat', 'roundsPlayed_metadata': {}, 'roundsPlayed_value': 24, 'roundsPlayed_displayValue': '24', 'roundsPlayed_displayType': 'Number', 'roundsWon_rank': None, 'roundsWon_percentile': None, 'roundsWon_displayName': 'Rounds Won', 'roundsWon_displayCategory': 'Combat', 'roundsWon_category': 'combat', 'roundsWon_metadata': {}, 'roundsWon_value': 13, 'roundsWon_displayValue': '13', 'roundsWon_displayType': 'Number', 'roundsLost_rank': None, 'roundsLost_percentile': None, 'roundsLost_displayName': 'Rounds Lost', 'roundsLost_displayCategory': 'Combat', 'roundsLost_category': 'combat', 'roundsLost_metadata': {}, 'roundsLost_value': 11, 'roundsLost_displayValue': '11', 'roundsLost_displayType': 'Number', 'roundsDisconnected_rank': None, 'roundsDisconnected_percentile': None, 'roundsDisconnected_displayName': 'Rounds Disconnected', 'roundsDisconnected_displayCategory': 'Combat', 'roundsDisconnected_category': 'combat', 'roundsDisconnected_metadata': {}, 'roundsDisconnected_value': None, 'roundsDisconnected_displayValue': None, 'roundsDisconnected_displayType': 'Number', 'placement_rank': None, 'placement_percentile': None, 'placement_displayName': 'placement', 'placement_displayCategory': 'Combat', 'placement_category': 'combat', 'placement_metadata': {}, 'placement_value': 3, 'placement_displayValue': '3', 'placement_displayType': 'Number', 'score_rank': None, 'score_percentile': None, 'score_displayName': 'Score', 'score_displayCategory': 'Combat', 'score_category': 'combat', 'score_metadata': {}, 'score_value': 6976, 'score_displayValue': '6,976', 'score_displayType': 'Number', 'kills_rank': None, 'kills_percentile': None, 'kills_displayName': 'Kills', 'kills_displayCategory': 'Combat', 'kills_category': 'combat', 'kills_metadata': {}, 'kills_value': 24, 'kills_displayValue': '24', 'kills_displayType': 'Number', 'deaths_rank': None, 'deaths_percentile': None, 'deaths_displayName': 'Deaths', 'deaths_displayCategory': 'Combat', 'deaths_category': 'combat', 'deaths_metadata': {}, 'deaths_value': 16, 'deaths_displayValue': '16', 'deaths_displayType': 'Number', 'assists_rank': None, 'assists_percentile': None, 'assists_displayName': 'Assists', 'assists_displayCategory': 'Combat', 'assists_category': 'combat', 'assists_metadata': {}, 'assists_value': 4, 'assists_displayValue': '4', 'assists_displayType': 'Number', 'damage_rank': None, 'damage_percentile': None, 'damage_displayName': 'damage', 'damage_displayCategory': 'Combat', 'damage_category': 'combat', 'damage_metadata': {}, 'damage_value': 4667, 'damage_displayValue': '4,667', 'damage_displayType': 'Number', 'damageReceived_rank': None, 'damageReceived_percentile': None, 'damageReceived_displayName': 'Damaged Received', 'damageReceived_displayCategory': 'Combat', 'damageReceived_category': 'combat', 'damageReceived_metadata': {}, 'damageReceived_value': 3256, 'damageReceived_displayValue': '3,256', 'damageReceived_displayType': 'Number', 'headshots_rank': None, 'headshots_percentile': None, 'headshots_displayName': 'Headshots', 'headshots_displayCategory': 'Combat', 'headshots_category': 'combat', 'headshots_metadata': {}, 'headshots_value': 14, 'headshots_displayValue': '14', 'headshots_displayType': 'Number', 'grenadeCasts_rank': None, 'grenadeCasts_percentile': None, 'grenadeCasts_displayName': 'kills', 'grenadeCasts_displayCategory': 'Combat', 'grenadeCasts_category': 'combat', 'grenadeCasts_metadata': {}, 'grenadeCasts_value': 16, 'grenadeCasts_displayValue': '16', 'grenadeCasts_displayType': 'Number', 'ability1Casts_rank': None, 'ability1Casts_percentile': None, 'ability1Casts_displayName': 'kills', 'ability1Casts_displayCategory': 'Combat', 'ability1Casts_category': 'combat', 'ability1Casts_metadata': {}, 'ability1Casts_value': 5, 'ability1Casts_displayValue': '5', 'ability1Casts_displayType': 'Number', 'ability2Casts_rank': None, 'ability2Casts_percentile': None, 'ability2Casts_displayName': 'kills', 'ability2Casts_displayCategory': 'Combat', 'ability2Casts_category': 'combat', 'ability2Casts_metadata': {}, 'ability2Casts_value': 16, 'ability2Casts_displayValue': '16', 'ability2Casts_displayType': 'Number', 'ultimateCasts_rank': None, 'ultimateCasts_percentile': None, 'ultimateCasts_displayName': 'Ultimate Casts', 'ultimateCasts_displayCategory': 'Combat', 'ultimateCasts_category': 'combat', 'ultimateCasts_metadata': {}, 'ultimateCasts_value': 3, 'ultimateCasts_displayValue': '3', 'ultimateCasts_displayType': 'Number', 'dealtHeadshots_rank': None, 'dealtHeadshots_percentile': None, 'dealtHeadshots_displayName': 'Dealt Headshots', 'dealtHeadshots_displayCategory': 'Combat', 'dealtHeadshots_category': 'combat', 'dealtHeadshots_metadata': {}, 'dealtHeadshots_value': 15, 'dealtHeadshots_displayValue': '15', 'dealtHeadshots_displayType': 'Number', 'dealtBodyshots_rank': None, 'dealtBodyshots_percentile': None, 'dealtBodyshots_displayName': 'Dealt Bodyshots', 'dealtBodyshots_displayCategory': 'Combat', 'dealtBodyshots_category': 'combat', 'dealtBodyshots_metadata': {}, 'dealtBodyshots_value': 34, 'dealtBodyshots_displayValue': '34', 'dealtBodyshots_displayType': 'Number', 'dealtLegshots_rank': None, 'dealtLegshots_percentile': None, 'dealtLegshots_displayName': 'Dealt Legshots', 'dealtLegshots_displayCategory': 'Combat', 'dealtLegshots_category': 'combat', 'dealtLegshots_metadata': {}, 'dealtLegshots_value': 1, 'dealtLegshots_displayValue': '1', 'dealtLegshots_displayType': 'Number', 'econRating_rank': None, 'econRating_percentile': None, 'econRating_displayName': 'Econ Rating', 'econRating_displayCategory': 'Combat', 'econRating_category': 'combat', 'econRating_metadata': {}, 'econRating_value': 81, 'econRating_displayValue': '81', 'econRating_displayType': 'Number', 'suicides_rank': None, 'suicides_percentile': None, 'suicides_displayName': 'Suicides', 'suicides_displayCategory': 'Combat', 'suicides_category': 'combat', 'suicides_metadata': {}, 'suicides_value': 0, 'suicides_displayValue': '0', 'suicides_displayType': 'Number', 'revived_rank': None, 'revived_percentile': None, 'revived_displayName': 'revived', 'revived_displayCategory': 'Combat', 'revived_category': 'combat', 'revived_metadata': {}, 'revived_value': None, 'revived_displayValue': None, 'revived_displayType': 'Number', 'firstBloods_rank': None, 'firstBloods_percentile': None, 'firstBloods_displayName': 'First Bloods', 'firstBloods_displayCategory': 'Combat', 'firstBloods_category': 'combat', 'firstBloods_metadata': {}, 'firstBloods_value': 5, 'firstBloods_displayValue': '5', 'firstBloods_displayType': 'Number', 'firstDeaths_rank': None, 'firstDeaths_percentile': None, 'firstDeaths_displayName': 'First Deaths', 'firstDeaths_displayCategory': 'Combat', 'firstDeaths_category': 'combat', 'firstDeaths_metadata': {}, 'firstDeaths_value': 4, 'firstDeaths_displayValue': '4', 'firstDeaths_displayType': 'Number', 'lastDeaths_rank': None, 'lastDeaths_percentile': None, 'lastDeaths_displayName': 'Last Deaths', 'lastDeaths_displayCategory': 'Combat', 'lastDeaths_category': 'combat', 'lastDeaths_metadata': {}, 'lastDeaths_value': 3, 'lastDeaths_displayValue': '3', 'lastDeaths_displayType': 'Number', 'survived_rank': None, 'survived_percentile': None, 'survived_displayName': 'Survived', 'survived_displayCategory': 'Combat', 'survived_category': 'combat', 'survived_metadata': {}, 'survived_value': 3, 'survived_displayValue': '3', 'survived_displayType': 'Number', 'traded_rank': None, 'traded_percentile': None, 'traded_displayName': 'Traded', 'traded_displayCategory': 'Combat', 'traded_category': 'combat', 'traded_metadata': {}, 'traded_value': 1, 'traded_displayValue': '1', 'traded_displayType': 'Number', 'kasted_rank': None, 'kasted_percentile': None, 'kasted_displayName': 'Kasted', 'kasted_displayCategory': 'Combat', 'kasted_category': 'combat', 'kasted_metadata': {}, 'kasted_value': None, 'kasted_displayValue': None, 'kasted_displayType': 'Number', 'kAST_rank': None, 'kAST_percentile': None, 'kAST_displayName': 'KAST', 'kAST_displayCategory': 'Combat', 'kAST_category': 'combat', 'kAST_metadata': {}, 'kAST_value': 83.0, 'kAST_displayValue': '83.0%', 'kAST_displayType': 'NumberPercentage', 'flawless_rank': None, 'flawless_percentile': None, 'flawless_displayName': 'Flawless', 'flawless_displayCategory': 'Combat', 'flawless_category': 'combat', 'flawless_metadata': {}, 'flawless_value': 2, 'flawless_displayValue': '2', 'flawless_displayType': 'Number', 'thrifty_rank': None, 'thrifty_percentile': None, 'thrifty_displayName': 'Thrifty', 'thrifty_displayCategory': 'Combat', 'thrifty_category': 'combat', 'thrifty_metadata': {}, 'thrifty_value': None, 'thrifty_displayValue': None, 'thrifty_displayType': 'Number', 'aces_rank': None, 'aces_percentile': None, 'aces_displayName': 'Aces', 'aces_displayCategory': 'Combat', 'aces_category': 'combat', 'aces_metadata': {}, 'aces_value': 0, 'aces_displayValue': '0', 'aces_displayType': 'Number', 'teamAces_rank': None, 'teamAces_percentile': None, 'teamAces_displayName': 'Team Aces', 'teamAces_displayCategory': 'Combat', 'teamAces_category': 'combat', 'teamAces_metadata': {}, 'teamAces_value': 0, 'teamAces_displayValue': '0', 'teamAces_displayType': 'Number', 'clutches_rank': None, 'clutches_percentile': None, 'clutches_displayName': 'Clutches', 'clutches_displayCategory': 'Combat', 'clutches_category': 'combat', 'clutches_metadata': {}, 'clutches_value': 2, 'clutches_displayValue': '2', 'clutches_displayType': 'Number', 'clutchesLost_rank': None, 'clutchesLost_percentile': None, 'clutchesLost_displayName': 'Clutches Lost', 'clutchesLost_displayCategory': 'Combat', 'clutchesLost_category': 'combat', 'clutchesLost_metadata': {}, 'clutchesLost_value': 3, 'clutchesLost_displayValue': '3', 'clutchesLost_displayType': 'Number', 'plants_rank': None, 'plants_percentile': None, 'plants_displayName': 'Plants', 'plants_displayCategory': 'Combat', 'plants_category': 'combat', 'plants_metadata': {}, 'plants_value': 0, 'plants_displayValue': '0', 'plants_displayType': 'Number', 'defuses_rank': None, 'defuses_percentile': None, 'defuses_displayName': 'Defuses', 'defuses_displayCategory': 'Combat', 'defuses_category': 'combat', 'defuses_metadata': {}, 'defuses_value': 2, 'defuses_displayValue': '2', 'defuses_displayType': 'Number', 'kdRatio_rank': None, 'kdRatio_percentile': None, 'kdRatio_displayName': 'K/D Ratio', 'kdRatio_displayCategory': None, 'kdRatio_category': None, 'kdRatio_metadata': {}, 'kdRatio_value': 1.5, 'kdRatio_displayValue': '1.5', 'kdRatio_displayType': 'NumberPrecision1', 'scorePerRound_rank': None, 'scorePerRound_percentile': None, 'scorePerRound_displayName': 'Avg. Score', 'scorePerRound_displayCategory': None, 'scorePerRound_category': None, 'scorePerRound_metadata': {}, 'scorePerRound_value': 290.6666666666667, 'scorePerRound_displayValue': '290.7', 'scorePerRound_displayType': 'NumberPrecision1', 'damagePerRound_rank': None, 'damagePerRound_percentile': None, 'damagePerRound_displayName': 'Dmg/round', 'damagePerRound_displayCategory': None, 'damagePerRound_category': None, 'damagePerRound_metadata': {}, 'damagePerRound_value': 194.45833333333334, 'damagePerRound_displayValue': '194.5', 'damagePerRound_displayType': 'NumberPrecision1', 'headshotsPercentage_rank': None, 'headshotsPercentage_percentile': None, 'headshotsPercentage_displayName': 'Headshot%', 'headshotsPercentage_displayCategory': None, 'headshotsPercentage_category': None, 'headshotsPercentage_metadata': {}, 'headshotsPercentage_value': 30.0, 'headshotsPercentage_displayValue': '30', 'headshotsPercentage_displayType': 'Number', 'rank_rank': None, 'rank_percentile': None, 'rank_displayName': 'Rating', 'rank_displayCategory': None, 'rank_category': 'mmr', 'rank_metadata': {'iconUrl': 'https://trackercdn.com/cdn/tracker.gg/valorant/icons/tiersv2/24.png', 'tierName': 'Immortal 1'}, 'rank_value': None, 'rank_displayValue': '', 'rank_displayType': 'String'}

columns = a.keys()
columns

dict_keys(['platformSlug', 'platformUserIdentifier', 'modeKey', 'modeName', 'modeImageUrl', 'modeMaxRounds', 'isAvailable', 'timestamp', 'result', 'map', 'mapName', 'mapImageUrl', 'seasonName', 'expiryDate', 'segment_type', 'platformUserHandle', 'hasWon', 'agent', 'agentName', 'agentColor', 'agentImageUrl', 'match_id', 'playtime_rank', 'playtime_percentile', 'playtime_displayName', 'playtime_displayCategory', 'playtime_category', 'playtime_metadata', 'playtime_value', 'playtime_displayValue', 'playtime_displayType', 'roundsPlayed_rank', 'roundsPlayed_percentile', 'roundsPlayed_displayName', 'roundsPlayed_displayCategory', 'roundsPlayed_category', 'roundsPlayed_metadata', 'roundsPlayed_value', 'roundsPlayed_displayValue', 'roundsPlayed_displayType', 'roundsWon_rank', 'roundsWon_percentile', 'roundsWon_displayName', 'roundsWon_displayCategory', 'roundsWon_category', 'roundsWon_metadata', 'roundsWon_value', 'roundsWon_displayValue', 'roundsWon_displayType', 'roundsLost_rank', 'roundsLost_

In [66]:
class DataCleaner():
    
    def data_cleaner_matches():

        path_read = 'raw/trackergg/matches_report/top500/all_servers/'
        path_write = 'cleaned/trackergg/matches_report/top500/all_servers/'
        
        df_aux = pd.DataFrame()

        files = AwsS3.get_files_list(path_read)

        for file in files:
            file = file.key
            data_s3 = AwsS3.get_file(path_read, file)


            data_dict_list = list(eval(data_s3))

            for dict in data_dict_list:

                data_json = json.loads(str(dict))
                
                # expiryDate : str = data_json["data"]["expiryDate"]
                requestingPlayerAttributes : dict = data_json["data"]["requestingPlayerAttributes"]
                paginationType : str = data_json["data"]["paginationType"]
                metadata : dict = data_json["data"]["metadata"]
                matches : list = data_json["data"]["matches"]

                data = []

                for match in matches:
                    attributes : dict = match["attributes"]
                    match_metadata : dict = match["metadata"]
                    # expiryDate : str = match["expiryDate"]
                    
                    segments : list = match["segments"]
                    for segment in segments:
                        segment_type: str = segment["type"]
                        attributes : dict = segment["attributes"]
                        segment_metadata : dict = segment["metadata"]
                        # expiryDate : str = segment["expiryDate"]
                        
                        stat_dict = {}
                        stats : dict = segment["stats"]
                        for stat, stat_data in stats.items():
                            stat_keys = stat_data.keys()
                            stat_columns = [f'{stat}_{col}' for col in stat_keys]
                            stat_values = stat_data.values()
                            _stat_dict = {k: v for k, v in zip(stat_columns, stat_values)}
                            stat_dict.update(_stat_dict)

                        row = {}
                        row.update(attributes)
                        row.update(match_metadata)
                        # row["expiryDate"] = expiryDate
                        row["segment_type"] = segment_type
                        row.update(attributes)
                        row.update(segment_metadata)
                        # row["expiryDate"] = expiryDate
                        row["match_id"] = match["attributes"]["id"]
                        row.update(stat_dict)

                        data.append(row)
                
            df = pd.DataFrame(data, columns=columns)
            df_aux = pd.concat([df_aux, df], axis = 0)

        df_final = pd.concat([df_aux, df_aux['rank_metadata'].apply(pd.Series)], axis=1)

        df_final.to_csv('data/matches.csv')

        data_final_csv = df_final.to_csv()

        file_format = '.csv'

        AwsS3.upload_file(data_final_csv, path_write, file_format)

        return df_final


   
    def data_cleaner_matches_details():
        
        path_read = 'raw/trackergg/matches_report/top500/all_servers/'
        path_write = 'cleaned/trackergg/matches_report/top500/all_servers/'     

        files = AwsS3.get_files_list(path_read)

        metadata_dict_list = []
        player_round_dict_list = []
        player_round_damage_dict_list = []
        player_summary_dict_list = []
        player_round_kills = []

        for file in files:
            data_json = json.loads(AwsS3.get_file(path_read, file.key))
            metadata : dict = data_json["data"]["metadata"]
            metadata['match_id'] = data_json['data']["attributes"]["id"]
            metadata_dict_list.append(metadata)

            segments = data_json['data']['segments']

            for segment in segments:
                if segment['type'] == 'player-round':
                    segment_dict = {}
                    attributes = segment['attributes']
                    segment_dict.update(attributes)
                    metadata = segment['metadata']
                    segment_dict.update(metadata)
                    segment_stats = segment['stats']
                    segment_stats_dict = {}
                    for stat, stat_data in segment_stats.items():
                        stat_keys = stat_data.keys()
                        stat_columns = [f'{stat}_{col}' for col in stat_keys]
                        stat_values = stat_data.values()
                        _stat_dict = {k: v for k, v in zip(stat_columns, stat_values)}
                        segment_stats_dict.update(_stat_dict)
                    segment_dict.update(segment_stats_dict)
                    segment_dict['match_id'] = data_json['data']["attributes"]["id"]
                    player_round_dict_list.append(segment_dict)

                elif segment['type'] == 'player-round-damage':
                    segment_dict = {}
                    attributes = segment['attributes']
                    segment_dict.update(attributes)
                    segment_stats = segment['stats']
                    segment_stats_dict = {}
                    for stat, stat_data in segment_stats.items():
                        stat_keys = stat_data.keys()
                        stat_columns = [f'{stat}_{col}' for col in stat_keys]
                        stat_values = stat_data.values()
                        _stat_dict = {k: v for k, v in zip(stat_columns, stat_values)}
                        segment_stats_dict.update(_stat_dict)
                    segment_dict.update(segment_stats_dict)
                    segment_dict['match_id'] = data_json['data']["attributes"]["id"]
                    player_round_damage_dict_list.append(segment_dict)
                
                elif segment['type'] == 'player-summary':
                    segment_dict = {}
                    attributes = segment['attributes']
                    segment_dict.update(attributes)
                    metadata = segment['metadata']
                    segment_dict.update(metadata)
                    segment_stats = segment['stats']
                    segment_stats_dict = {}
                    for stat, stat_data in segment_stats.items():
                        try:
                            stat_keys = stat_data.keys()
                            stat_columns = [f'{stat}_{col}' for col in stat_keys]
                            stat_values = stat_data.values()
                            _stat_dict = {k: v for k, v in zip(stat_columns, stat_values)}
                            segment_stats_dict.update(_stat_dict)
                        except AttributeError:
                            """Tratativa de excecao para o caso em que nao tivermos alguma coluna dentro dos status"""
                            pass
                    segment_dict.update(segment_stats_dict)
                    segment_dict['match_id'] = data_json['data']["attributes"]["id"]
                    player_summary_dict_list.append(segment_dict)

                elif segment['type'] == 'player-round-kills':
                    segment_dict = {}
                    attributes = segment['attributes']
                    segment_dict.update(attributes)
                    metadata = segment['metadata']
                    segment_metadata_dict = {}
                    for metadata_iter, metadata_data in metadata.items():
                        try:
                            metadata_keys = metadata_data.keys()
                            metadata_columns = [f'{metadata_iter}_{col}' for col in metadata_keys]
                            metadata_values = metadata_data.values()
                            _metadata_dict = {k: v for k, v in zip(metadata_columns, metadata_values)}
                            segment_metadata_dict.update(_metadata_dict)
                        except AttributeError:
                            """Tratativa de excecao para o caso em que nao tivermos alguma coluna dentro dos status"""
                            metadata_weaponImageUrl = metadata['weaponImageUrl']
                            metadata_weaponName = metadata['weaponName']
                            metadata_weaponCategory = metadata['weaponCategory']
                            metadata_gameTime = metadata['gameTime']
                            metadata_roundTime = metadata['roundTime']
                            pass
                        segment_stats_damage = segment['stats']['damage']
                        segment_metadata_dict.update(segment_stats_damage)
                    segment_dict.update(segment_metadata_dict)
                    segment_dict['match_id'] = data_json['data']["attributes"]["id"]
                    player_round_kills.append(segment_dict)

        return (metadata_dict_list, player_round_dict_list, 
                player_round_damage_dict_list, player_summary_dict_list, 
                player_round_kills)
    
    def data_cleaner_guns():

        path_read = 'raw/trackergg/gun_report/'
        path_write = 'cleaned/trackergg/gun_report/'


        files = AwsS3.get_files_list(path_read)

        data = []
        
        for file in files:
            
            file = file.key
            data_s3 = AwsS3.get_file(path_read, file)
            data_json = json.loads(data_s3)
            weapons = data_json['data']

            for weapon in weapons:
                weapon_metadata = weapon["metadata"]
                weapon_stats = weapon["stats"]

                stat_dict = {}
                for stat, stat_data in weapon_stats.items():
                    stat_keys = weapon_stats.keys()
                    stat_columns = [f'{col}' for col in stat_keys]
                    stat_values = weapon_stats.values()
                    _stat_dict = {k: v for k, v in zip(stat_columns, stat_values)}
                    stat_dict.update(_stat_dict)
                
                row = {}
                row.update(weapon_metadata)
                row.update(stat_dict)

                data.append(row)

        df_final = pd.json_normalize(json.loads(json.dumps(data)))

        df_final.to_csv('guns.csv')

        data_final_csv = df_final.to_csv()

        file_format = '.csv'

        AwsS3.upload_file(data_final_csv, path_write, file_format)

In [8]:
servers_list = ['br', 'latam', 'kr', 'na', 'eu', 'ap']
top500_all_servers = Crawler.get_top500_all_servers(servers_list)
df_top500_all_servers = pd.DataFrame(top500_all_servers)
df_top500_all_servers.head(5)

Unnamed: 0.1,Unnamed: 0,meta.shard,meta.season,meta.page,meta.perPage,meta.totalCount,meta.updatedAt,leaderboards.puuid,leaderboards.gameName,leaderboards.tagLine,leaderboards.leaderboardRank,leaderboards.rankedRating,leaderboards.numberOfWins,leaderboards.competitiveTier,leaderboards.stat.winRatio,leaderboards.stat.avgScore,leaderboards.stat.headshotRatio,leaderboards.stat.mostCharacters,leaderboards.full_nickname
0,0,br,aca29595-40e4-01f5-3f35-b1b3d304c96e,1,100,500,2022-10-24T23:05:33.000Z,1q93nnIL_LTjr-30olVrL1T5PBFkTEe-UFL90_Nv__3L1h...,NIP cauanzin,2233,1,893,41,27,0.75,236.0,0.275,[{'characterId': 'dade69b4-4f5a-8528-247b-219e...,NIP cauanzin%232233
1,1,br,aca29595-40e4-01f5-3f35-b1b3d304c96e,1,100,500,2022-10-24T23:05:33.000Z,H54BruRabl10rwGbWHq_QCCLETxlgkxTECYxdgBSjr3nsF...,znjder,dgzin,2,836,56,27,0.614,255.0,0.323,[{'characterId': 'add6443a-41bd-e414-f6ad-e58d...,znjder%23dgzin
2,2,br,aca29595-40e4-01f5-3f35-b1b3d304c96e,1,100,500,2022-10-24T23:05:33.000Z,ZdLSdPcpCDRal6PnvnQvSr9fhL50p6yYOv1V9diQ_uW0xF...,ODK TCKENAN,BICEP,3,778,46,27,0.71,264.0,0.231,[{'characterId': '22697a3d-45bf-8dd7-4fec-84a9...,ODK TCKENAN%23BICEP
3,3,br,aca29595-40e4-01f5-3f35-b1b3d304c96e,1,100,500,2022-10-24T23:05:33.000Z,IQe3ZLxEjsiAqpeiwvisON7WS7T6m6IC5ePuk10eCYm5oJ...,gobera,zzz,4,738,44,27,0.564,260.0,0.255,[{'characterId': 'add6443a-41bd-e414-f6ad-e58d...,gobera%23zzz
4,4,br,aca29595-40e4-01f5-3f35-b1b3d304c96e,1,100,500,2022-10-24T23:05:33.000Z,Q21s61uqjWtv_WQ9Uu1NTDmcCpZCrZj-I2XQziPC7YTUBs...,guintt,2006,5,705,46,27,0.5,225.0,0.291,[{'characterId': 'a3bfb853-43b2-7238-a4f1-ad90...,guintt%232006


In [9]:
players_list = df_top500_all_servers['leaderboards.full_nickname'].to_list()

In [None]:
top500_matches = Crawler.get_top500_players_matches_report(players_list)
# top500_matches

In [None]:
top500_p_clean = DataCleaner.data_cleaner_matches()
top500_p_clean

In [None]:
json.loads(top500_p_clean)

In [74]:
data_s3 = list(data_s3)
print(type(data_s3))

<class 'list'>


In [None]:
data_s3

In [None]:
path_read = 'raw/trackergg/matches_report/top500/all_servers/'
files = AwsS3.get_files_list(path_read)

for file in files:
    file = file.key
    data_s3 = AwsS3.get_file(path_read, file)

data_s3