# Ingest JSON from Raw to Bronze

Ingest bootstrap-static and fixtures from manually uploaded JSON files

Write to bronze layer tables in fpl_bronze volume.

In [0]:
import sys
print("test")
# Add your src folders to the Python path
sys.path.append("/Workspace/Users/ben@wolfinnovations.org/fpl-team-predictor/src/ingestion")
sys.path.append("/Workspace/Users/ben@wolfinnovations.org/fpl-team-predictor/src/utils")

In [0]:
import pyspark.sql.functions as F
from delta.tables import DeltaTable
from helper_data_utils import write_to_table, merge_to_table, detect_schema_drift
from ingestion_utils import ingest_entity, read_latest_raw_json

In [0]:
dbutils.widgets.text("env", "test")
ENV = dbutils.widgets.get("env")


In [0]:
BRONZE_SCHEMA = f"fpl_bronze_{ENV}"
BASE_RAW_JSON_PATH = "/Volumes/workspace/fpl_raw/raw_json/"
PROTOCOL = "INCR"

# Ingest Raw JSON files

bootstrap-static is core data, with schema as:

- events: Basic information of every Gameweek such as average score, highest score, top scoring player, most captained, etc. Incremental
- game_settings: The game settings and rules. 
- phases: Phases of FPL season. 
- teams: Basic information of current Premier League clubs.
- total_players: Total FPL players.
- elements: Information of all Premier League players including points, status, value, match stats (goals, assists, etc.), ICT index, etc. Incremental
- element_types: Basic information about player’s position (GK, DEF, MID, FWD).
- chips: All chips available in FPL.
- game_config: scoring and game setup rules.


fixtures contains all data about fixtures for the season. It needs to be incrementally loaded as fixtures change often due to clashes/TV viewing changes.

In [0]:
bootstrap_static_df = read_latest_raw_json(
    base_path = BASE_RAW_JSON_PATH, 
    filename = "bootstrap_static.json",
    spark = spark,
    dbutils = dbutils
    )
    
fixtures_df = read_latest_raw_json(
    base_path = BASE_RAW_JSON_PATH, 
    filename = "fixtures.json",
    spark = spark,
    dbutils = dbutils
    )

In [0]:
bootstrap_static_df.printSchema()

In [0]:
fixtures_df.printSchema()

In [0]:
ENTITY_CONFIG = [
    {
        "name": "chips",
        "df": bootstrap_static_df,
        "path": "chips",
        "explode": True,
        "alias": "chip",
        "protocol": "HIST"
    },
    {
        "name": "element_stats",
        "df": bootstrap_static_df,
        "path": "element_stats",
        "explode": True,
        "alias": "stat",
        "protocol": "HIST"
    },
    {
        "name": "element_types",
        "df": bootstrap_static_df,
        "path": "element_types",
        "explode": True,
        "alias": "type",
        "protocol": "HIST"
    },
    {
        "name": "game_config_scoring",
        "df": bootstrap_static_df,
        "path": "game_config.scoring",
        "explode": False,
        "protocol": "HIST"
    },
    {
        "name": "game_config_rules",
        "df": bootstrap_static_df,
        "path": "game_config.rules",
        "explode": False,
        "protocol": "HIST"
    },
    {
        "name": "phases",
        "df": bootstrap_static_df,
        "path": "phases",
        "explode": True,
        "alias": "phase",
        "protocol": "HIST"
    },
    {
        "name": "teams",
        "df": bootstrap_static_df,
        "path": "teams",
        "explode": True,
        "alias": "team",
        "protocol": "HIST"
    },
    {
        "name": "elements",
        "df": bootstrap_static_df,
        "path": "elements",
        "explode": True,
        "alias": "player",
        "protocol": "INCR",
        "merge_key": "id"
    },
    {
        "name": "events",
        "df": bootstrap_static_df,
        "path": "events",
        "explode": True,
        "alias": "event",
        "protocol": "INCR",
        "merge_key": "id"
    },
    {
        "name": "fixtures",
        "df": fixtures_df,
        "path": None,
        "explode": False,
        "protocol": "INCR",
        "merge_key": "id"
    }

]

In [0]:
for entity in ENTITY_CONFIG:
    ingest_entity(
        entity_config = entity,
        bronze_schema = BRONZE_SCHEMA,
        protocol = PROTOCOL,
        spark = spark
    )