In [1]:
from pandas import read_csv
from aws_s3 import AwsS3
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import time

class Crawler():
    
    def get_matches_report_detail() -> str:

        options = webdriver.ChromeOptions()
        options.add_experimental_option('excludeSwitches', ['enable-logging'])
        driver = webdriver.Remote('http://127.0.0.1:4444/wd/hub', options = options)

        path_write = 'raw/trackergg/matches_report_details/'

        matches = read_csv("matches.csv")
        matches = matches['match_id'].to_list()

        for matche in matches:
            
            driver.get('https://api.tracker.gg/api/v2/valorant/standard/matches/{}'.format(matche))
            
            data_pre = driver.find_element('xpath', '//pre').text

            time.sleep(5)

            file_format = '.txt'
            
            AwsS3.upload_file(data_pre, path_write, file_format)

        driver.quit()

        # return data_pre

In [2]:
import boto3
import os
from dotenv import load_dotenv
from datetime import datetime
from botocore.exceptions import ClientError
import logging

load_dotenv()

AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_S3_BUCKET = os.getenv("AWS_S3_BUCKET")


class AwsS3():

    
    def upload_file(data : object, path : str, file_format) -> bool:

        """
            Upload a file to an S3 bucket
            :param file_name: File to upload
            :param bucket: Bucket to upload to
            :param object_name: S3 object name. If not specified then file_name is used
            :return: True if file was uploaded, else False
        """

        date = datetime.now().strftime("_%Y%m%d_%H%M%S")
        file_name = 'valorant_reports{}{}'.format(date, file_format)
        input = path + file_name

        
        s3 = boto3.client("s3", aws_access_key_id = AWS_ACCESS_KEY_ID, aws_secret_access_key = AWS_SECRET_ACCESS_KEY)

        try:
            s3.put_object(Bucket = AWS_S3_BUCKET, Body = data, Key = input)

        except ClientError as e:
            logging.error(e)

            return False

        return True

    
    def get_file(path : str, file_name : str) -> str:

        """
            Get a file to an S3 bucket
            :param Path: Path to get
            :param bucket: Bucket to upload to
            :param object_name: S3 object name. If not specified then file_name is used
            :return: True if file was uploaded, else False
        """
        s3 = boto3.client('s3')
        
        try:

            response = s3.get_object(Bucket = AWS_S3_BUCKET, Key = file_name)
            data = response['Body'].read()
            data_str = data.decode('utf-8')

        except ClientError as e:
            logging.error(e)


        return data_str
        

    def get_files_list(path_read : str) -> list:

        s3 = boto3.resource('s3')
        bucket = s3.Bucket(AWS_S3_BUCKET)
        files_list = bucket.objects.filter(Prefix = path_read)
        files_list = list(files_list)
        
        if len(files_list) > 1: 
            del files_list[0]
        else:
            pass

        return files_list

In [16]:
from aws_s3 import AwsS3
import pandas as pd
import json


class DataCleaner():
    
    def data_cleaner_matches():

        path_read = 'raw/trackergg/matches_report_details/'
        path_write = 'cleaned/trackergg/matches_report_details/'
        
        df_aux = pd.DataFrame()

        files = AwsS3.get_files_list(path_read)

        data = []

        for file in files:
            file = file.key
            data_s3 = AwsS3.get_file(path_read, file)
            data_json = json.loads(data_s3)
            
            expiryDate : str = data_json["data"]["expiryDate"]
            metadata : dict = data_json["data"]["metadata"]
            segments = data_json['data']['segments']

            data = []


            for segment in segments:
                segment_type: str = segment["type"]
                attributes : dict = segment["attributes"]
                segment_metadata : dict = segment["metadata"]
                expiryDate : str = segment["expiryDate"]
                
                stat_dict = {}
                stats : dict = segment["stats"]
                for stat, stat_data in stats.items():
                    stat_keys = stat_data.keys()
                    stat_columns = [f'{stat}_{col}' for col in stat_keys]
                    stat_values = stat_data.values()
                    _stat_dict = {k: v for k, v in zip(stat_columns, stat_values)}
                    stat_dict.update(_stat_dict)
            
                row = {}  
                row["match_id"] = data_json['data']["attributes"]["id"]
                row["expiryDate"] = expiryDate
                row["segment_type"] = segment_type
                row.update(attributes)
                row.update(segment_metadata)
                row.update(stat_dict)
                data.append(row)
            
            df = pd.DataFrame(data)

            
            return df

                # # data.append(row)
           
       

In [17]:
data = DataCleaner.data_cleaner_matches()


In [None]:
df = pd.json_normalize(json.loads(data.to_json(orient='records')))
df


In [19]:
df.to_csv('matches_details.csv')

In [None]:
df_team_summary = df[['segment_type', 'teamId', 'name', 'hasWon',
	                  'roundsWon_rank', 'roundsWon_percentile',	
                      'roundsWon_displayName', 'roundsWon_displayCategory',
                      'roundsWon_category', 'roundsWon_value', 'roundsWon_displayValue',
                      'roundsWon_displayType', 'roundsLost_rank', 'roundsLost_percentile',
                      'roundsLost_displayName', 'roundsLost_displayCategory', 'roundsLost_category',
                      'roundsLost_value', 'roundsLost_displayValue', 'roundsLost_displayType',
                      'score_rank',	'score_percentile',	'score_displayName', 'score_displayCategory', 'score_category',
                      'score_value', 'score_displayValue',	'score_displayType', 'kills_rank', 'kills_percentile',
                      'kills_displayName', 'kills_displayCategory', 'kills_category', 'kills_value', 'kills_displayValue',
                      'kills_displayType',	'deaths_rank',	'deaths_percentile', 'deaths_displayName', 'deaths_displayCategory',
                      'deaths_category', 'deaths_value', 'deaths_displayValue',	'deaths_displayType', 'assists_rank', 'assists_percentile',
                      'assists_displayName', 'assists_displayCategory',	'assists_category',	'assists_value', 'assists_displayValue',
                      'assists_displayType', 'damage_rank',	'damage_percentile', 'damage_displayName', 'damage_displayCategory', 'damage_category',
                      'damage_value', 'damage_displayValue', 'damage_displayType',
]]