In [1]:
from pyspark.sql.types import StructType
from pyspark.sql.types import *
from pyspark.sql import SparkSession
# from pyspark.sql.functions import *
import pyspark.sql.functions as F
from pyspark.sql import DataFrame
from pyspark.sql.streaming import DataStreamWriter
from minio import Minio
from datetime import timedelta
from delta.tables import *
import os
from pyspark.sql.window import Window
import re

def minio_session_spark():
    spark = (
        SparkSession.builder
            .master("local[*]")
            .appName("appMinIO")
            ### Config Fields
            .config('spark.sql.debug.maxToStringFields', 5000)
            .config('spark.debug.maxToStringFields', 5000)
            ### Optimize
            .config("delta.autoOptimize.optimizeWrite", "true")
            .config("delta.autoOptimize.autoCompact", "true")
            ### Delta Table
            .config("spark.jars.packages", "io.delta:delta-core_2.12:2.3.0")
            .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
            .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
            ## MinIO
            #.config("spark.hadoop.fs.s3a.endpoint", "http://172.20.0.2:9000")
             .config("spark.hadoop.fs.s3a.endpoint", "minio:9000")

            .config("spark.hadoop.fs.s3a.access.key", "tcc_user")
            .config("spark.hadoop.fs.s3a.secret.key", "Acnmne@a9h!")
            .config("spark.hadoop.fs.s3a.path.style.access", "true")
            .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
            .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false")
            ## Jars
            .config("spark.jars", "/home/jovyan/work/jars/hadoop-common-3.3.2.jar,\
                                    /home/jovyan/work/jars/hadoop-aws-3.3.2.jar, \
                                    /home/jovyan/work/jars/aws-java-sdk-bundle-1.11.874.jar")
            .config('spark.hadoop.fs.s3a.aws.credentials.provider', 'org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider')
            .getOrCreate()
    )
    return spark

In [2]:
spark = minio_session_spark()

In [4]:
#MINIO CONFIGS
minio_endpoint = 'minio:9000'
minio_access_key = 'tcc_fia'
minio_secret_key = 'tcc_fia_2024'
secure = False  
minio_client = Minio(endpoint=minio_endpoint, access_key=minio_access_key, secret_key=minio_secret_key, secure=secure)

In [5]:
minio_bucket = 'raw'
prefix = 'matchs/'

In [None]:
objects = minio_client.list_objects(minio_bucket, prefix=folder)
json_files = [f"s3a://{minio_bucket}/{obj.object_name}" for obj in objects]
pattern = re.compile(f'{tier}')
filtered_json = [path for path in json_files if pattern.search(path)]
print('LISTA FILTRADA PARA O TIER: {filtered_json}')
df = spark.read.json(filtered_json[0:])

In [6]:
objects = minio_client.list_objects(minio_bucket, prefix=prefix)
json_files = [f"s3a://{minio_bucket}/{obj.object_name}" for obj in objects]

In [7]:
json_files

['s3a://raw/matchs/',
 's3a://raw/matchs/bronze_BR1_2946682948.json',
 's3a://raw/matchs/bronze_BR1_2946697589.json',
 's3a://raw/matchs/bronze_BR1_2954036635.json',
 's3a://raw/matchs/gold_BR1_2945378663.json',
 's3a://raw/matchs/gold_BR1_2945399069.json',
 's3a://raw/matchs/gold_BR1_2945710981.json',
 's3a://raw/matchs/gold_BR1_2946653050.json',
 's3a://raw/matchs/gold_BR1_2946661734.json',
 's3a://raw/matchs/gold_BR1_2947047636.json',
 's3a://raw/matchs/gold_BR1_2947061773.json',
 's3a://raw/matchs/gold_BR1_2947071535.json',
 's3a://raw/matchs/gold_BR1_2947075721.json',
 's3a://raw/matchs/gold_BR1_2947093158.json',
 's3a://raw/matchs/gold_BR1_2947106151.json',
 's3a://raw/matchs/gold_BR1_2947336995.json',
 's3a://raw/matchs/gold_BR1_2947350585.json',
 's3a://raw/matchs/gold_BR1_2947363819.json',
 's3a://raw/matchs/gold_BR1_2947660645.json',
 's3a://raw/matchs/gold_BR1_2947671000.json',
 's3a://raw/matchs/gold_BR1_2947687026.json',
 's3a://raw/matchs/gold_BR1_2948718613.json',
 's3a:

In [8]:
tier = 'bronze'
pattern = re.compile(f'{tier}')
bronze_files = [path for path in json_files if pattern.search(path)]
bronze_files

['s3a://raw/matchs/bronze_BR1_2946682948.json',
 's3a://raw/matchs/bronze_BR1_2946697589.json',
 's3a://raw/matchs/bronze_BR1_2954036635.json']

In [9]:
bronze_files[0:]

['s3a://raw/matchs/bronze_BR1_2946682948.json',
 's3a://raw/matchs/bronze_BR1_2946697589.json',
 's3a://raw/matchs/bronze_BR1_2954036635.json']

In [10]:
df = spark.read.json(json_files[0:])

In [11]:
df_final = (
    df
    .withColumn('players_exploded', F.explode(F.col("info.participants")))
    .selectExpr("metadata.matchId"
                ,"players_exploded.puuid"
                ,"players_exploded.summonerId"
                ,"players_exploded.championName"
                ,"players_exploded.role"
                ,"players_exploded.assists"
                ,"players_exploded.damageDealtToTurrets"
                ,"players_exploded.damageDealtToObjectives"
                ,"players_exploded.detectorWardsPlaced"
                ,"players_exploded.visionScore"
                ,"players_exploded.visionWardsBoughtInGame"
                ,"players_exploded.wardsKilled"
                ,"players_exploded.wardsPlaced"
                ,"players_exploded.enemyMissingPings"
                ,"players_exploded.enemyVisionPings"
                ,"players_exploded.getBackPings"
                ,"players_exploded.goldEarned"
                ,"players_exploded.goldSpent"
                ,"players_exploded.longestTimeSpentLiving"
                ,"players_exploded.magicDamageDealt"
                ,"players_exploded.magicDamageDealtToChampions"
                ,"players_exploded.magicDamageTaken"
                ,"players_exploded.physicalDamageDealt"
                ,"players_exploded.physicalDamageDealtToChampions"
                ,"players_exploded.physicalDamageTaken"
                ,"players_exploded.totalDamageDealt"
                ,"players_exploded.totalDamageDealtToChampions"
                ,"players_exploded.totalDamageTaken"
                ,"players_exploded.totalTimeSpentDead"  
                ,"players_exploded.spell1Casts"
                ,"players_exploded.spell2Casts"
                ,"players_exploded.spell3Casts"
                ,"players_exploded.spell4Casts"
                ,"players_exploded.teamId"
                ,"players_exploded.win"
               )
)
