In [1]:
# Importing necessary libraries to extract the data properly from Postgres using PySpark
import pandas as pd
import numpy as np
from API_Key import username, password
import requests
import time
import psycopg2 as pg2
import json
from pandas.io.json import json_normalize
import pyspark
from pyspark.sql import SparkSession
import pyspark.sql.functions as psf

In [2]:
spark = SparkSession \
    .builder \
    .appName("TFT Analysis") \
    .config("spark.jars", "/Users/hiowatah/downloads/postgresql-42.2.18.jar") \
    .getOrCreate()

In [3]:
df = spark.read \
    .format("jdbc") \
    .option("url", "jdbc:postgresql://localhost:5432/TFT") \
    .option("dbtable", "matches") \
    .option("user", username) \
    .option("password", password) \
    .option("driver", "org.postgresql.Driver") \
    .load()

In [4]:
df.schema

StructType(List(StructField(match,StringType,true),StructField(game_datetime,LongType,true),StructField(game_length,DoubleType,true),StructField(game_version,StringType,true),StructField(gold_left,IntegerType,true),StructField(last_round,IntegerType,true),StructField(level,IntegerType,true),StructField(placement,IntegerType,true),StructField(puuid,StringType,true),StructField(time_eliminated,DoubleType,true),StructField(total_damage,IntegerType,true),StructField(traits,StringType,true),StructField(units,StringType,true)))

In [5]:
df.select("units", "traits").show(1, truncate = False, vertical=True)

-RECORD 0-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 units  | [{"character_id": "TFT4_Maokai", "items": [66, 57], "name": "", "rarity": 0, "tier": 3}, {"character_id": "TFT4_Hecarim", "items": [77], "name": "", "rarity": 1, "tier": 2}, {"character_id": "TFT4_Zed", "items": [], "name": "", "rarity": 1, "tier": 1}, {"character_id": "

# In order to analyze the games played on the latest version, I need to filter the game_version column

In [6]:
df.where("game_version LIKE '%Version 11%'")

DataFrame[match: string, game_datetime: bigint, game_length: double, game_version: string, gold_left: int, last_round: int, level: int, placement: int, puuid: string, time_eliminated: double, total_damage: int, traits: string, units: string]

In [7]:
def jsonColParser(df, *cols, sanitize=True):
    res = df
    for i in cols:
        if sanitize:
            res = (
                res.withColumn(
                    i,
                    psf.concat(psf.lit('{"data": '), i, psf.lit('}'))
                )
            )
        schema = spark.read.json(res.rdd.map(lambda x: x[i])).schema
        res = res.withColumn(i, psf.from_json(psf.col(i), schema))
        
        if sanitize:
            res = res.withColumn(i, psf.col(i).data)
            
    return res

In [23]:
test = jsonColParser(df, "units", "traits")

In [9]:
test.schema

StructType(List(StructField(match,StringType,true),StructField(game_datetime,LongType,true),StructField(game_length,DoubleType,true),StructField(game_version,StringType,true),StructField(gold_left,IntegerType,true),StructField(last_round,IntegerType,true),StructField(level,IntegerType,true),StructField(placement,IntegerType,true),StructField(puuid,StringType,true),StructField(time_eliminated,DoubleType,true),StructField(total_damage,IntegerType,true),StructField(traits,ArrayType(StructType(List(StructField(name,StringType,true),StructField(num_units,LongType,true),StructField(style,LongType,true),StructField(tier_current,LongType,true),StructField(tier_total,LongType,true))),true),true),StructField(units,ArrayType(StructType(List(StructField(character_id,StringType,true),StructField(chosen,StringType,true),StructField(items,ArrayType(LongType,true),true),StructField(name,StringType,true),StructField(rarity,LongType,true),StructField(tier,LongType,true))),true),true)))

In [10]:
test.select("units", "traits").show(1, truncate = False, vertical=True)

-RECORD 0----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 units  | [[TFT4_Maokai,, [66, 57], , 0, 3], [TFT4_Hecarim,, [77], , 1, 2], [TFT4_Zed,, [], , 1, 1], [TFT4_Sylas, Set4_Brawler, [12, 3], , 1, 2], [TFT4_Lulu,, [2], , 1, 2], [TFT4_Nunu,, [], , 2, 2], [TFT4_Kennen,, [34], , 2, 1]] 
 traits | [[Keeper, 1, 0, 0, 3], [Moonlight, 1, 0, 0, 2], [Set4_Brawler, 4, 2, 2, 4], [Set4_Elderwood, 4, 1, 1, 3], [Set4_Mage, 1, 0, 0, 3], [Set4_Ninja, 2, 0, 0, 2], [Set4_Shade, 1, 0, 0, 3], [Set4_Vanguard, 1, 0, 0, 4]]        
only showing top 1 row



In [76]:
test.select(test.units.character_id).show(1, truncate = False, vertical=True)

-RECORD 0--------------------------------------------------------------------------------------------------
 units.character_id | [TFT4_Maokai, TFT4_Hecarim, TFT4_Zed, TFT4_Sylas, TFT4_Lulu, TFT4_Nunu, TFT4_Kennen] 
only showing top 1 row



In [77]:
test.select(test.units.items).show(1, truncate = False, vertical=True)

-RECORD 0---------------------------------------------------
 units.items | [[66, 57], [77], [], [12, 3], [2], [], [34]] 
only showing top 1 row



In [81]:
test.select(test.units.chosen).show(1, truncate = False, vertical=True)

-RECORD 0-----------------------------
 units.chosen | [,,, Set4_Brawler,,,] 
only showing top 1 row



In [80]:
test.select(test.units.tier).show(1, truncate = False, vertical=True)

-RECORD 0---------------------------
 units.tier | [3, 2, 1, 2, 2, 2, 1] 
only showing top 1 row

