# SQL ETL Notebook: Rainbow Cup

Perform SQL ETL to load game and team data for the Rainbow Cup and put it in the SQL database.

In [205]:
import os, sys, subprocess, json, time

In [206]:
from sqlalchemy import Column, Integer, String, Boolean, Table
from sqlalchemy.orm import relationship, backref
from sqlalchemy.orm import DeclarativeBase

In [207]:
class Base(DeclarativeBase):
    pass

In [208]:
class RainbowGame(Base):
    __tablename__ = "rainbow"
    gameid = Column("gameid", String, primary_key=True)
    
    team1Abbr = Column("team1Abbr", String)
    team1Name = Column("team1Name", String)
    team1Color = Column("team1Color", String)
    
    team2Abbr = Column("team2Abbr", String)
    team2Name = Column("team2Name", String)
    team2Color = Column("team2Color", String)

    team3Abbr = Column("team3Abbr", String)
    team3Name = Column("team3Name", String)
    team3Color = Column("team3Color", String)

    team4Abbr = Column("team4Abbr", String)
    team4Name = Column("team4Name", String)
    team4Color = Column("team4Color", String)
    
    season = Column("season", Integer)
    day = Column("day", Integer)
    isPostseason = Column("isPostseason", Boolean)
    series = Column("series", String)
    league = Column("league", String)
    map_patternName = Column("map_patternName", String)
    map_mapName = Column("map_mapName", String)
    
    map_initialConditions1 = Column("map_initialConditions1", String)
    map_initialConditions2 = Column("map_initialConditions2", String)
    map_initialConditions3 = Column("map_initialConditions3", String)
    map_initialConditions4 = Column("map_initialConditions4", String)
    
    map_rows = Column("map_rows", Integer)
    map_columns = Column("map_columns", Integer)
    map_cellSize = Column("map_cellSize", Integer)
    generations = Column("generations", Integer)
    
    team1Score = Column("team1Score", Integer)
    team2Score = Column("team2Score", Integer)
    team3Score = Column("team3Score", Integer)
    team4Score = Column("team4Score", Integer)

    team1Rank = Column("team1Rank", Integer)
    team2Rank = Column("team2Rank", Integer)
    team3Rank = Column("team3Rank", Integer)
    team4Rank = Column("team4Rank", Integer)

    team1TotalPoints = Column("team1TotalPoints", Integer)
    team2TotalPoints = Column("team2TotalPoints", Integer)
    team3TotalPoints = Column("team3TotalPoints", Integer)
    team4TotalPoints = Column("team4TotalPoints", Integer)

    team1SeriesTotalPoints = Column("team1SeriesTotalPoints", Integer)
    team2SeriesTotalPoints = Column("team2SeriesTotalPoints", Integer)
    team3SeriesTotalPoints = Column("team3SeriesTotalPoints", Integer)
    team4SeriesTotalPoints = Column("team4SeriesTotalPoints", Integer)
    
    team1W23L_W = Column("team1W23L_W", Integer)
    team1W23L_2 = Column("team1W23L_2", Integer)
    team1W23L_3 = Column("team1W23L_3", Integer)
    team1W23L_L = Column("team1W23L_L", Integer)
    
    team2W23L_W = Column("team2W23L_W", Integer)
    team2W23L_2 = Column("team2W23L_2", Integer)
    team2W23L_3 = Column("team2W23L_3", Integer)
    team2W23L_L = Column("team2W23L_L", Integer)
    
    team3W23L_W = Column("team3W23L_W", Integer)
    team3W23L_2 = Column("team3W23L_2", Integer)
    team3W23L_3 = Column("team3W23L_3", Integer)
    team3W23L_L = Column("team3W23L_L", Integer)
    
    team4W23L_W = Column("team4W23L_W", Integer)
    team4W23L_2 = Column("team4W23L_2", Integer)
    team4W23L_3 = Column("team4W23L_3", Integer)
    team4W23L_L = Column("team4W23L_L", Integer)
    
    team1PostseasonW23L_W = Column("team1PostseasonW23L_W", Integer)
    team1PostseasonW23L_2 = Column("team1PostseasonW23L_2", Integer)
    team1PostseasonW23L_3 = Column("team1PostseasonW23L_3", Integer)
    team1PostseasonW23L_L = Column("team1PostseasonW23L_L", Integer)
    
    team2PostseasonW23L_W = Column("team2PostseasonW23L_W", Integer)
    team2PostseasonW23L_2 = Column("team2PostseasonW23L_2", Integer)
    team2PostseasonW23L_3 = Column("team2PostseasonW23L_3", Integer)
    team2PostseasonW23L_L = Column("team2PostseasonW23L_L", Integer)
    
    team3PostseasonW23L_W = Column("team3PostseasonW23L_W", Integer)
    team3PostseasonW23L_2 = Column("team3PostseasonW23L_2", Integer)
    team3PostseasonW23L_3 = Column("team3PostseasonW23L_3", Integer)
    team3PostseasonW23L_L = Column("team3PostseasonW23L_L", Integer)
    
    team4PostseasonW23L_W = Column("team4PostseasonW23L_W", Integer)
    team4PostseasonW23L_2 = Column("team4PostseasonW23L_2", Integer)
    team4PostseasonW23L_3 = Column("team4PostseasonW23L_3", Integer)
    team4PostseasonW23L_L = Column("team4PostseasonW23L_L", Integer)
    
    team1SeriesW23L_W = Column("team1SeriesW23L_W", Integer)
    team1SeriesW23L_2 = Column("team1SeriesW23L_2", Integer)
    team1SeriesW23L_3 = Column("team1SeriesW23L_3", Integer)
    team1SeriesW23L_L = Column("team1SeriesW23L_L", Integer)
    
    team2SeriesW23L_W = Column("team2SeriesW23L_W", Integer)
    team2SeriesW23L_2 = Column("team2SeriesW23L_2", Integer)
    team2SeriesW23L_3 = Column("team2SeriesW23L_3", Integer)
    team2SeriesW23L_L = Column("team2SeriesW23L_L", Integer)

    team3SeriesW23L_W = Column("team3SeriesW23L_W", Integer)
    team3SeriesW23L_2 = Column("team3SeriesW23L_2", Integer)
    team3SeriesW23L_3 = Column("team3SeriesW23L_3", Integer)
    team3SeriesW23L_L = Column("team3SeriesW23L_L", Integer)
    
    team4SeriesW23L_W = Column("team4SeriesW23L_W", Integer)
    team4SeriesW23L_2 = Column("team4SeriesW23L_2", Integer)
    team4SeriesW23L_3 = Column("team4SeriesW23L_3", Integer)
    team4SeriesW23L_L = Column("team4SeriesW23L_L", Integer)

This table class can also be used with the sqlalchemy CreateTable class, to generate the SQL needed to create this table.

Example from here: <https://docs.sqlalchemy.org/en/20/orm/declarative_tables.html>

In [209]:
from sqlalchemy.schema import CreateTable
from sqlalchemy.dialects import sqlite

In [210]:
print(CreateTable(RainbowGame.__table__).compile(dialect=sqlite.dialect()))


CREATE TABLE rainbow (
	gameid VARCHAR NOT NULL, 
	"team1Abbr" VARCHAR, 
	"team1Name" VARCHAR, 
	"team1Color" VARCHAR, 
	"team2Abbr" VARCHAR, 
	"team2Name" VARCHAR, 
	"team2Color" VARCHAR, 
	"team3Abbr" VARCHAR, 
	"team3Name" VARCHAR, 
	"team3Color" VARCHAR, 
	"team4Abbr" VARCHAR, 
	"team4Name" VARCHAR, 
	"team4Color" VARCHAR, 
	season INTEGER, 
	day INTEGER, 
	"isPostseason" BOOLEAN, 
	series VARCHAR, 
	league VARCHAR, 
	"map_patternName" VARCHAR, 
	"map_mapName" VARCHAR, 
	"map_initialConditions1" VARCHAR, 
	"map_initialConditions2" VARCHAR, 
	"map_initialConditions3" VARCHAR, 
	"map_initialConditions4" VARCHAR, 
	map_rows INTEGER, 
	map_columns INTEGER, 
	"map_cellSize" INTEGER, 
	generations INTEGER, 
	"team1Score" INTEGER, 
	"team2Score" INTEGER, 
	"team3Score" INTEGER, 
	"team4Score" INTEGER, 
	"team1Rank" INTEGER, 
	"team2Rank" INTEGER, 
	"team3Rank" INTEGER, 
	"team4Rank" INTEGER, 
	"team1TotalPoints" INTEGER, 
	"team2TotalPoints" INTEGER, 
	"team3TotalPoints" INTEGER, 
	"team

In [211]:
CUP = 'rainbow'

def fetch_data(which_season0, fname):
    f = os.path.join('..', 'data', f'gollyx-{CUP}-data', f'season{which_season0}', fname)
    if not os.path.exists(f):
        raise Exception(f"Error: season {which_season0} not valid: {f} does not exist")
    with open(f, 'r') as fz:
        season0_seas = json.load(fz)
    return season0_seas

In [212]:
def fetch_season_data(which_season0):
    return fetch_data(which_season0, fname='season.json')

def fetch_postseason_data(which_season0):
    return fetch_data(which_season0, fname='postseason.json')

In [213]:
def flatten_season(season):
    games = []
    for day in season:
        for game in day:
            games.append(game)
    return games

def flatten_postseason(postseason):
    games = []
    for series in postseason:
        miniseason = postseason[series]
        for day in miniseason:
            for game in day:
                game['series'] = series
                games.append(game)
    return games

In [214]:
s3 = flatten_season(fetch_season_data(3))
g = s3[0]

In [215]:
print(g.keys())

dict_keys(['team1Name', 'team1Abbr', 'team2Name', 'team2Abbr', 'team3Name', 'team3Abbr', 'team4Name', 'team4Abbr', 'league', 'season', 'day', 'gameid', 'isPostseason', 'team1Color', 'team2Color', 'team3Color', 'team4Color', 'map', 'team1Score', 'team2Score', 'team3Score', 'team4Score', 'team1Rank', 'team2Rank', 'team3Rank', 'team4Rank', 'generations', 'team1W23L', 'team1TotalPoints', 'team2W23L', 'team2TotalPoints', 'team3W23L', 'team3TotalPoints', 'team4W23L', 'team4TotalPoints'])


In [216]:
p3 = flatten_postseason(fetch_postseason_data(3))
g2 = p3[0]

In [217]:
print(g2.keys())

dict_keys(['league', 'team1Name', 'team1Abbr', 'team2Name', 'team2Abbr', 'team3Name', 'team3Abbr', 'team4Name', 'team4Abbr', 'description', 'season', 'gameid', 'isPostseason', 'map', 'team1Score', 'team2Score', 'team3Score', 'team4Score', 'team1Rank', 'team2Rank', 'team3Rank', 'team4Rank', 'generations', 'team1Color', 'team1SeriesW23L', 'team1SeriesTotalPoints', 'team2Color', 'team2SeriesW23L', 'team2SeriesTotalPoints', 'team3Color', 'team3SeriesW23L', 'team3SeriesTotalPoints', 'team4Color', 'team4SeriesW23L', 'team4SeriesTotalPoints', 'day', 'series'])


In [218]:
def create_new_game(session, game, id_key='gameid'):
    this_id = game[id_key]
    
    # Check if game already exists
    existing_game = (
        session.query(RainbowGame)
        .filter(RainbowGame.gameid==this_id)
        .one_or_none()
    )
    if existing_game is not None:
        # print(f"Found an existing game with game id {this_id}, skipping insert")
        return

    # Now create the new game.
    # Keys are mostly one-to-one map, except maps and win/loss.
    # (First, account for the difference in keys in postseason vs regular season games)
    if 'description' not in game:
        game['description'] = ''
    if 'series' not in game:
        game['series'] = ''
    if 'league' not in game:
        game['league'] = ''

    for i in range(1, 4+1):
        if f'team{i}W23L' not in game:
            game[f'team{i}W23L'] = [-1, -1, -1, -1]
        if f'team{i}PostseasonW23L' not in game:
            game[f'team{i}PostseasonW23L'] = [-1, -1, -1, -1]
        if f'team{i}SeriesW23L' not in game:
            game[f'team{i}SeriesW23L'] = [-1, -1, -1, -1]
        if f'team{i}TotalPoints' not in game:
            game[f'team{i}TotalPoints'] = -1
        if f'team{i}SeriesTotalPoints' not in game:
            game[f'team{i}SeriesTotalPoints'] = -1
    
    hg = RainbowGame(
        gameid = game['gameid'],
        
        team1Abbr = game['team1Abbr'],
        team1Name = game['team1Name'],
        team1Color = game['team1Color'],
        
        team2Abbr = game['team2Abbr'],
        team2Name = game['team2Name'],
        team2Color = game['team2Color'],
        
        team3Abbr = game['team3Abbr'],
        team3Name = game['team3Name'],
        team3Color = game['team3Color'],
        
        team4Abbr = game['team4Abbr'],
        team4Name = game['team4Name'],
        team4Color = game['team4Color'],
        
        season = game['season'],
        day = game['day'],
        isPostseason = game['isPostseason'],
        series = game['series'],
        league = game['league'],
        map_patternName = game['map']['patternName'],
        map_mapName = game['map']['mapName'],
        
        map_initialConditions1 = game['map']['initialConditions1'],
        map_initialConditions2 = game['map']['initialConditions2'],
        map_initialConditions3 = game['map']['initialConditions3'],
        map_initialConditions4 = game['map']['initialConditions4'],
        
        map_rows = game['map']['rows'],
        map_columns = game['map']['columns'],
        map_cellSize = game['map']['cellSize'],
        
        generations = game['generations'],
        
        team1Score = game['team1Score'],
        team2Score = game['team2Score'],
        team3Score = game['team3Score'],
        team4Score = game['team4Score'],

        team1Rank = game['team1Rank'],
        team2Rank = game['team2Rank'],
        team3Rank = game['team3Rank'],
        team4Rank = game['team4Rank'],

        team1TotalPoints = game['team1TotalPoints'],
        team2TotalPoints = game['team2TotalPoints'],
        team3TotalPoints = game['team3TotalPoints'],
        team4TotalPoints = game['team4TotalPoints'],
        
        team1SeriesTotalPoints = game['team1SeriesTotalPoints'],
        team2SeriesTotalPoints = game['team2SeriesTotalPoints'],
        team3SeriesTotalPoints = game['team3SeriesTotalPoints'],
        team4SeriesTotalPoints = game['team4SeriesTotalPoints'],
        
        team1W23L_W = game['team1W23L'][0],
        team1W23L_2 = game['team1W23L'][1],
        team1W23L_3 = game['team1W23L'][2],
        team1W23L_L = game['team1W23L'][3],
        
        team2W23L_W = game['team2W23L'][0],
        team2W23L_2 = game['team2W23L'][1],
        team2W23L_3 = game['team2W23L'][2],
        team2W23L_L = game['team2W23L'][3],
        
        team3W23L_W = game['team3W23L'][0],
        team3W23L_2 = game['team3W23L'][1],
        team3W23L_3 = game['team3W23L'][2],
        team3W23L_L = game['team3W23L'][3],
        
        team4W23L_W = game['team4W23L'][0],
        team4W23L_2 = game['team4W23L'][1],
        team4W23L_3 = game['team4W23L'][2],
        team4W23L_L = game['team4W23L'][3],
        
        team1PostseasonW23L_W = game['team1PostseasonW23L'][0],
        team1PostseasonW23L_2 = game['team1PostseasonW23L'][1],
        team1PostseasonW23L_3 = game['team1PostseasonW23L'][2],
        team1PostseasonW23L_L = game['team1PostseasonW23L'][3],
        
        team2PostseasonW23L_W = game['team2PostseasonW23L'][0],
        team2PostseasonW23L_2 = game['team2PostseasonW23L'][1],
        team2PostseasonW23L_3 = game['team2PostseasonW23L'][2],
        team2PostseasonW23L_L = game['team2PostseasonW23L'][3],
        
        team3PostseasonW23L_W = game['team3PostseasonW23L'][0],
        team3PostseasonW23L_2 = game['team3PostseasonW23L'][1],
        team3PostseasonW23L_3 = game['team3PostseasonW23L'][2],
        team3PostseasonW23L_L = game['team3PostseasonW23L'][3],
        
        team4PostseasonW23L_W = game['team4PostseasonW23L'][0],
        team4PostseasonW23L_2 = game['team4PostseasonW23L'][1],
        team4PostseasonW23L_3 = game['team4PostseasonW23L'][2],
        team4PostseasonW23L_L = game['team4PostseasonW23L'][3],
        
        team1SeriesW23L_W = game['team1SeriesW23L'][0],
        team1SeriesW23L_2 = game['team1SeriesW23L'][1],
        team1SeriesW23L_3 = game['team1SeriesW23L'][2],
        team1SeriesW23L_L = game['team1SeriesW23L'][3],

        team2SeriesW23L_W = game['team2SeriesW23L'][0],
        team2SeriesW23L_2 = game['team2SeriesW23L'][1],
        team2SeriesW23L_3 = game['team2SeriesW23L'][2],
        team2SeriesW23L_L = game['team2SeriesW23L'][3],

        team3SeriesW23L_W = game['team3SeriesW23L'][0],
        team3SeriesW23L_2 = game['team3SeriesW23L'][1],
        team3SeriesW23L_3 = game['team3SeriesW23L'][2],
        team3SeriesW23L_L = game['team3SeriesW23L'][3],

        team4SeriesW23L_W = game['team4SeriesW23L'][0],
        team4SeriesW23L_2 = game['team4SeriesW23L'][1],
        team4SeriesW23L_3 = game['team4SeriesW23L'][2],
        team4SeriesW23L_L = game['team4SeriesW23L'][3],
    )
    session.add(hg)

Now it's time to actually create a session, and call this function, and see if it works.

Documentation for creating and configuring an engine: <https://docs.sqlalchemy.org/en/20/core/engines.html#sqlite>

Documentation for creating a session: <https://docs.sqlalchemy.org/en/20/orm/session_basics.html>

In [219]:
import logging

logging.basicConfig()
logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING)
#logging.getLogger("sqlalchemy.pool").setLevel(logging.DEBUG)
logging.getLogger("sqlalchemy.pool").setLevel(logging.WARNING)

In [220]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

fpath = os.path.join(os.environ['HOME'], 'tmp', 'sqlite', 'mydb.db')
engine = create_engine(f"sqlite:///{fpath}")
Session = sessionmaker(engine)

In [221]:
# Start by creating all the tables
Base.metadata.create_all(engine)

In [222]:
# Run a simple query on the tables
with Session() as session:
    results = session.query(RainbowGame).all()

print(len(results))

0


In [223]:
# By using .begin(), each function call will call session.add() multiple times within the context,
# and when the context is closed, it will apply all changes.
with Session.begin() as session:
    create_new_game(session, s3[0])

## Load Season Data into SQLite Database

The code below loads all 24 regular season games into the SQLite database:

In [224]:
for season0 in range(24):
    sdat = flatten_season(fetch_season_data(season0))
    # Create a new context manager for each season, to limit number of inserts at one time
    with Session.begin() as session:
        print(f"Working on season0={season0}")
        for game in sdat:
            create_new_game(session, game)
print("\nDone!")

Working on season0=0
Working on season0=1
Working on season0=2
Working on season0=3
Working on season0=4
Working on season0=5
Working on season0=6
Working on season0=7
Working on season0=8
Working on season0=9
Working on season0=10
Working on season0=11
Working on season0=12
Working on season0=13
Working on season0=14
Working on season0=15
Working on season0=16
Working on season0=17
Working on season0=18
Working on season0=19
Working on season0=20
Working on season0=21
Working on season0=22
Working on season0=23

Done!


In [225]:
for season0 in range(24):
    pdat = flatten_postseason(fetch_postseason_data(season0))
    # Create a new context manager for each season, to limit number of inserts at one time
    with Session.begin() as session:
        print(f"Working on season0={season0} postseason")
        for game in pdat:
            create_new_game(session, game)
print("\nDone!")

Working on season0=0 postseason
Working on season0=1 postseason
Working on season0=2 postseason
Working on season0=3 postseason
Working on season0=4 postseason
Working on season0=5 postseason
Working on season0=6 postseason
Working on season0=7 postseason
Working on season0=8 postseason
Working on season0=9 postseason
Working on season0=10 postseason
Working on season0=11 postseason
Working on season0=12 postseason
Working on season0=13 postseason
Working on season0=14 postseason
Working on season0=15 postseason
Working on season0=16 postseason
Working on season0=17 postseason
Working on season0=18 postseason
Working on season0=19 postseason
Working on season0=20 postseason
Working on season0=21 postseason
Working on season0=22 postseason
Working on season0=23 postseason

Done!
