In [1]:
from pandas import Series, DataFrame
from pandas import read_sql, read_csv
import sqlite3

# global 
database = "mlb.db"

In [50]:
# Helper functions for SQL
def run_query(q):
    """
    Returns DataFrame to the executed SQL query 
    """
    with sqlite3.connect(database) as conn:
        return read_sql(q, conn)
    
def run_command(c):
    """
    Executes SQL commands to the database
    """
    with sqlite3.connect(database) as conn:
        conn.execute("""PRAGMA foreign_keys = ON;""")
        conn.isolation_level = None
        conn.execute(c)
        
def show_tables():
    q = """
    SELECT
        name,
        type
    FROM
        sqlite_master
    WHERE
        type IN ("table", "view")
    """
    return run_query(q)

def show_table_schema(table):
    df = DataFrame()
    q = """"""
    if not table:
        q = """
        SELECT
            sql
        FROM
            sqlite_master
        WHERE
            type = "table" 
        """
    else:
        q = """
        SELECT
            sql
        FROM
            sqlite_master
        WHERE
            name = "{}"
            AND
            type = "table";
        """.format(table)
        
    df = run_query(q)
    
    for schema in df["sql"].values:
        print(schema)
    


In [3]:
# Reading the files
game_log = read_csv("game_log.csv", low_memory=False)
person_codes = read_csv("person_codes.csv", low_memory=False)
park_codes = read_csv("park_codes.csv", low_memory=False)
team_codes = read_csv("team_codes.csv", low_memory=False)

tables = {
    "game_log": game_log,
    "person_codes": person_codes,
    "park_codes": park_codes,
    "team_codes": team_codes
}

In [4]:
# Writing files to SQL Database mlb.db
with sqlite3.connect(database) as conn:
    for table_name, data in tables.items():
        # To prevent multiple times this cell is execute in Jupyter notebook
        conn.execute("""DROP TABLE IF EXISTS {}""".format(table_name))
        data.to_sql(table_name, conn, index=False)

In [5]:
show_tables()

Unnamed: 0,name,type
0,game_log,table
1,person_codes,table
2,park_codes,table
3,team_codes,table


In [6]:
# Add game_id column to game_log

c1 = """
ALTER TABLE game_log
ADD COLUMN game_id TEXT;
"""

# Since ALTER TABLE does not support IF NOT EXISTS, 
# use try/except to pass when the code is execute multiple times
try:
    run_command(c1)
except:
    pass

# Update the game log
# game_id is a compound primary key that uses data, h_name, number_of_game columns
c2 = """
UPDATE 
    game_log
SET 
    game_id = date || h_name || number_of_game
/*WHERE prevents this if it has already been done*/
WHERE 
    game_id is NULL;
"""
run_command(c2)

In [7]:
run_query("""SELECT date, h_name, number_of_game, game_id FROM game_log LIMIT 3;""")

Unnamed: 0,date,h_name,number_of_game,game_id
0,18710504,FW1,0,18710504FW10
1,18710505,WS3,0,18710505WS30
2,18710506,RC1,0,18710506RC10


<img src="schema-screenshot.png"/>

Insert all values from table_two into table_one
```mysql
INSERT INTO 
    table_one
SELECT 
    * 
FROM 
    table_two
```

Similar to 
```mysql 
IF NOT EXISTS
``` 
We can use 

```mysql
INSERT OR IGNORE
``` 

Create person table with columns and primary key as shown in the schema diagram

In [8]:
!ls *.csv

appearance_type.csv  park_codes.csv    team_codes.csv
game_log.csv	     person_codes.csv


## Create person table from Schema
- Data from ```person_codes```
- Each of the 'debut' columns have been omitted, as the data will be able to be found from other tables.
- Since the game log file has no data on coaches, we made the decision to not include this data.

In [9]:
run_query("""SELECT * FROM person_codes LIMIT 5;""")

Unnamed: 0,id,last,first,player_debut,mgr_debut,coach_debut,ump_debut
0,aardd001,Aardsma,David,04/06/2004,,,
1,aaroh101,Aaron,Hank,04/13/1954,,,
2,aarot101,Aaron,Tommie,04/10/1962,,04/06/1979,
3,aased001,Aase,Don,07/26/1977,,,
4,abada001,Abad,Andy,09/10/2001,,,


In [10]:
# Create person table in the database
# IF NOT EXISTS - prevents code from failing if ran multiple times. 
c1 = """
CREATE TABLE IF NOT EXISTS person (
    person_id TEXT PRIMARY KEY,
    first_name TEXT,
    last_name TEXT
);
"""
run_command(c1)

# Populate the person table with data
# INSERT OR IGNORE - prevents code from failing if ran multiple times
c2 = """
INSERT OR IGNORE INTO person
SELECT
    id,
    first,
    last
FROM
    person_codes;
"""
run_command(c2)

# View the data that were inserted into person table
q = """
SELECT
    *
FROM
    person
LIMIT
    5;
"""
run_query(q)

Unnamed: 0,person_id,first_name,last_name
0,aardd001,David,Aardsma
1,aaroh101,Hank,Aaron
2,aarot101,Tommie,Aaron
3,aased001,Don,Aase
4,abada001,Andy,Abad


## Create park table from Schema
- Data from ```park_codes```
- Location where games are played. 

In [11]:
run_query("""SELECT * FROM park_codes LIMIT 5;""")

Unnamed: 0,park_id,name,aka,city,state,start,end,league,notes
0,ALB01,Riverside Park,,Albany,NY,09/11/1880,05/30/1882,NL,TRN:9/11/80;6/15&9/10/1881;5/16-5/18&5/30/1882
1,ALT01,Columbia Park,,Altoona,PA,04/30/1884,05/31/1884,UA,
2,ANA01,Angel Stadium of Anaheim,Edison Field; Anaheim Stadium,Anaheim,CA,04/19/1966,,AL,
3,ARL01,Arlington Stadium,,Arlington,TX,04/21/1972,10/03/1993,AL,
4,ARL02,Rangers Ballpark in Arlington,The Ballpark in Arlington; Ameriquest Fl,Arlington,TX,04/11/1994,,AL,


In [12]:
# Create park table in the database
# IF NOT EXISTS - prevents code from failing if ran multiple times.
c1 = """
CREATE TABLE IF NOT EXISTS park (
    park_id TEXT PRIMARY KEY,
    name TEXT,
    nickname TEXT,
    city TEXT,
    state TEXT,
    notes TEXT
);
"""
run_command(c1)

# Populate the park table with data
# INSERT OR IGNORE - prevents code from failing if ran multiple times
c2 = """
INSERT OR IGNORE INTO 
    park
SELECT
    park_id,
    name,
    aka,
    city,
    state,
    notes
FROM
    park_codes;
"""
run_command(c2)

# View the data that were inserted into park table
q = """
SELECT
    *
FROM
    park;
"""
run_query(q)


Unnamed: 0,park_id,name,nickname,city,state,notes
0,ALB01,Riverside Park,,Albany,NY,TRN:9/11/80;6/15&9/10/1881;5/16-5/18&5/30/1882
1,ALT01,Columbia Park,,Altoona,PA,
2,ANA01,Angel Stadium of Anaheim,Edison Field; Anaheim Stadium,Anaheim,CA,
3,ARL01,Arlington Stadium,,Arlington,TX,
4,ARL02,Rangers Ballpark in Arlington,The Ballpark in Arlington; Ameriquest Fl,Arlington,TX,
5,ATL01,Atlanta-Fulton County Stadium,,Atlanta,GA,
6,ATL02,Turner Field,,Atlanta,GA,
7,ATL03,Suntrust Park,,Atlanta,GA,
8,BAL01,Madison Avenue Grounds,,Baltimore,MD,WS3
9,BAL02,Newington Park,,Baltimore,MD,BL1:1872-74; BL4:1873; BL2: 1882


## Create league table from Schema
- Data from ```game_log["h_league"]``` 
- League names and its abbreviated league code.

In [13]:
game_log["h_league"].value_counts()

NL    88867
AL    74712
AA     5039
FL     1243
PL      532
UA      428
Name: h_league, dtype: int64

In [14]:
def league_info(league):
    league_games = game_log[game_log["h_league"] == league]
    earliest = league_games["date"].min()
    latest = league_games["date"].max()
    print("{} went from {} to {}".format(league, earliest, latest))

for league in game_log["h_league"].unique():
        league_info(league)

nan went from nan to nan
NL went from 18760422 to 20161002
AA went from 18820502 to 18911006
UA went from 18840417 to 18841019
PL went from 18900419 to 18901004
AL went from 19010424 to 20161002
FL went from 19140413 to 19151003


Now we have some years which will help us do some research. After some googling, we have come up with:

- NL: National League
- AL: American League
- AA: American Association
- FL: Federal League
- PL: Players League
- UA: Union Association

It also looks like we have about 1000 games where the home team doesn't have a value for league.

In [15]:
# Create league table in the database
# IF NOT EXISTS - prevents code from failing if ran multiple times.
c1 = """
CREATE TABLE IF NOT EXISTS league (
    league_id TEXT PRIMARY KEY,
    league_name TEXT
);
"""
run_command(c1)

# Populate the league table with data
# INSERT OR IGNORE - prevents code from failing if ran multiple times
c2 = """
INSERT OR IGNORE INTO 
    league
VALUES
    ("NL", "National League"),
    ("AL", "American League"),
    ("AA", "American Association"),
    ("FL", "Federal League"),
    ("PL", "Players League"),
    ("UA", "Union Association")
;    
"""
run_command(c2)

# View the data that were inserted into league table
q = """
SELECT
    *
FROM
    league;
"""
run_query(q)

Unnamed: 0,league_id,league_name
0,NL,National League
1,AL,American League
2,AA,American Association
3,FL,Federal League
4,PL,Players League
5,UA,Union Association


## Create appearance_type table from Schema
- Data from ```appearance_type.csv```
- The table store information on different types of apperances available. 
- The appearances includes players with positions, umpires, managers, and awards (like winning pitcher). 

In [37]:
appearance_type = read_csv("appearance_type.csv", low_memory=False)

# To prevent code execute multiple times 
c1 = """
DROP TABLE IF EXISTS appearance_type; 
"""
run_command(c1)

c2 = """
CREATE TABLE appearance_type(
    appearance_type_id TEXT PRIMARY KEY,
    name TEXT,
    category TEXT
);
"""
run_command(c2)

with sqlite3.connect(database) as conn:
    appearance_type.to_sql("appearance_type", conn, index=False, if_exists="append")

    
q = """
SELECT
    *
FROM
    appearance_type
LIMIT 
    5;
"""
run_query(q)
show_table_schema("appearance_type")

CREATE TABLE appearance_type(
    appearance_type_id TEXT PRIMARY KEY,
    name TEXT,
    category TEXT
)


## Create team table from schema
- Data from ```team_codes```
- Table contains information about each team.
- Foreign key ```league_id``` to ```league``` table

In [17]:
run_query("""SELECT * FROM team_codes LIMIT 5""")

Unnamed: 0,team_id,league,start,end,city,nickname,franch_id,seq
0,ALT,UA,1884,1884,Altoona,Mountain Cities,ALT,1
1,ARI,NL,1998,0,Arizona,Diamondbacks,ARI,1
2,BFN,NL,1879,1885,Buffalo,Bisons,BFN,1
3,BFP,PL,1890,1890,Buffalo,Bisons,BFP,1
4,BL1,,1872,1874,Baltimore,Canaries,BL1,1


In [18]:
c1 = """
CREATE TABLE IF NOT EXISTS team (
    team_id TEXT PRIMARY KEY,
    league_id TEXT,
    city TEXT,
    nickname TEXT,
    franch_id TEXT,
    FOREIGN KEY (league_id) REFERENCES league(league_id)
);
"""
run_command(c1)

c2 = """
INSERT OR IGNORE INTO
    team
SELECT
    team_id,
    league,
    city,
    nickname,
    franch_id
FROM
    team_codes;
"""
run_command(c2)

q = """
SELECT
    *
FROM
    team
"""
run_query(q)



Unnamed: 0,team_id,league_id,city,nickname,franch_id
0,ALT,UA,Altoona,Mountain Cities,ALT
1,ARI,NL,Arizona,Diamondbacks,ARI
2,BFN,NL,Buffalo,Bisons,BFN
3,BFP,PL,Buffalo,Bisons,BFP
4,BL1,,Baltimore,Canaries,BL1
5,BL2,AA,Baltimore,Orioles,BL2
6,BLN,NL,Baltimore,Orioles,BL2
7,BL4,,Baltimore,Marylands,BL4
8,BLA,AL,Baltimore,Orioles,BLA
9,NYA,AL,New York,Yankees,BLA


## Create game table from Schema
- Data from ```game_log```
- Table contains information that does not refer to
    - one specific team or player (These information are in ```appearance_type``` table)
    - league specific (These information are in ```league``` table)
- Colums that were remove 
    - day of week that can be dervied from date
- Columns that were changed
    - ```day_night``` column to ```day```, as a boolean column
    - Since SQLite does not support boolean, NUMERIC values ```1``` as ```TRUE``` and ```0``` as ```FALSE```
- Foreign key ```park_id``` references to ```park``` table   

In [19]:
# Columns to be used for inserting data into game table
q = """
SELECT 
    game_id,
    date,
    number_of_game,
    park_id,
    length_outs,
    /*day,*/
    completion,
    forefeit,
    protest,
    attendance,
    length_minutes,
    additional_info,
    acquisition_info
FROM
    game_log
LIMIT 3;
"""
run_query(q)

Unnamed: 0,game_id,date,number_of_game,park_id,length_outs,completion,forefeit,protest,attendance,length_minutes,additional_info,acquisition_info
0,18710504FW10,18710504,0,FOR01,54.0,,,,200.0,120.0,,Y
1,18710505WS30,18710505,0,WAS01,54.0,,,,5000.0,145.0,HTBF,Y
2,18710506RC10,18710506,0,RCK01,54.0,,,,1000.0,140.0,,Y


We will use ```day_night``` column to categorize ```day```
```mysql
CASE
    WHEN day_night = "D" THEN 1
    WHEN day_night = "N" THEN 0
    ELSE NULL
END 
    AS day
```

In [20]:
c1 = """
CREATE TABLE IF NOT EXISTS game (
    game_id TEXT PRIMARY KEY,
    date TEXT,
    number_of_game INTEGER,
    park_id TEXT,
    length_outs INTEGER,
    day INTEGER,
    completion TEXT,
    forefeit TEXT,
    protest TEXT,
    attendance INTEGER,
    length_minutes INTEGER,
    additional_info TEXT,
    acquisition_info TEXT,
    FOREIGN KEY (park_id) REFERENCES park(park_id)
);
"""
run_command(c1)

c2 = """
INSERT OR IGNORE INTO
    game
SELECT
    game_id,
    date,
    number_of_game,
    park_id,
    length_outs,
    CASE
        WHEN day_night = "D" THEN 1
        WHEN day_night = "N" THEN 0
        ELSE NULL
    END
        AS day,
    completion,
    forefeit,
    protest,
    attendance,
    length_minutes,
    additional_info,
    acquisition_info    
FROM
    game_log;
"""
run_command(c2)

run_query("""SELECT * FROM game LIMIT 5;""")

Unnamed: 0,game_id,date,number_of_game,park_id,length_outs,day,completion,forefeit,protest,attendance,length_minutes,additional_info,acquisition_info
0,18710504FW10,18710504,0,FOR01,54,1,,,,200,120,,Y
1,18710505WS30,18710505,0,WAS01,54,1,,,,5000,145,HTBF,Y
2,18710506RC10,18710506,0,RCK01,54,1,,,,1000,140,,Y
3,18710508CH10,18710508,0,CHI01,54,1,,,,5000,150,,Y
4,18710509TRO0,18710509,0,TRO01,54,1,,,,3250,145,HTBF,Y


In [21]:
# Successfully created game table
show_table_schema("game")

CREATE TABLE game (
    game_id TEXT PRIMARY KEY,
    date TEXT,
    number_of_game INTEGER,
    park_id TEXT,
    length_outs INTEGER,
    day INTEGER,
    completion TEXT,
    forefeit TEXT,
    protest TEXT,
    attendance INTEGER,
    length_minutes INTEGER,
    additional_info TEXT,
    acquisition_info TEXT,
    FOREIGN KEY (park_id) REFERENCES park(park_id)
)


## Adding the Team Appearance Table
- ```team_appearances``` has a compound primary key that contains
    - ```team_id``` which also served as a foreign key that refers to ```team(team_id)```
    - ```game_id``` which also served as a foreign key that refers to ```game(game_id)```
- According to the schema, there are three foreign tables that team_appearances references. These foreign tables are:
    - ```team (team_id)```
    - ```league (league_id)```
    - ```game (game_id)```
- A "boolean" column ```home``` to differentiate between home and away team.
- The rest of the columns are scores or statistics that exists in original game log are repeated for each of the home and away teams. 

In order to cleanly the data cleanly, we will need to use a ```UNION``` clause to combine home and away team.
```mysql
INSERT INTO team_appearance
    SELECT
        h_name,
        game_id,
        1 AS home,
        h_league,
        h_score,
        h_line_score,
        h_at_bats,
        [...]
    FROM game_log

UNION

    SELECT  
    .....
```

In [22]:
# game schema
show_table_schema("game")

CREATE TABLE game (
    game_id TEXT PRIMARY KEY,
    date TEXT,
    number_of_game INTEGER,
    park_id TEXT,
    length_outs INTEGER,
    day INTEGER,
    completion TEXT,
    forefeit TEXT,
    protest TEXT,
    attendance INTEGER,
    length_minutes INTEGER,
    additional_info TEXT,
    acquisition_info TEXT,
    FOREIGN KEY (park_id) REFERENCES park(park_id)
)


In [23]:
# team schema
show_table_schema("team")

CREATE TABLE team (
    team_id TEXT PRIMARY KEY,
    league_id TEXT,
    city TEXT,
    nickname TEXT,
    franch_id TEXT,
    FOREIGN KEY (league_id) REFERENCES league(league_id)
)


In [24]:
# league schema
show_table_schema("league")

CREATE TABLE league (
    league_id TEXT PRIMARY KEY,
    league_name TEXT
)


From 

In [25]:
c1 = """
CREATE TABLE IF NOT EXISTS team_appearance(
    team_id TEXT,
    game_id TEXT,
    home BOOLEAN,
    league_id TEXT,
    score INTEGER,
    line_score TEXT,
    at_bats INTEGER,
    hits INTEGER,
    doubles INTEGER,
    triples INTEGER,
    homeruns INTEGER,
    rbi INTEGER,
    sacrifice_hits INTEGER,
    sacrifice_flies INTEGER,
    hit_by_pitch INTEGER,
    walks INTEGER,
    intentional_walks INTEGER,
    strikeouts INTEGER,
    stolen_bases INTEGER,
    caught_stealing INTEGER,
    grounded_into_double INTEGER,
    first_catcher_interference INTEGER,
    left_on_base INTEGER,
    pitchers_used INTEGER,
    individual_earned_runs INTEGER,
    team_earned_runs INTEGER,
    wild_pitches INTEGER,
    balks INTEGER,
    putouts INTEGER,
    assists INTEGER,
    errors INTEGER,
    passed_balls INTEGER,
    double_plays INTEGER,
    triple_plays INTEGER,
    PRIMARY KEY (team_id, game_id),
    FOREIGN KEY (game_id) REFERENCES game(game_id),
    FOREIGN KEY (league_id) REFERENCES league(league_id),
    FOREIGN KEY (team_id) REFERENCES team(team_id)

);
"""

run_command(c1)


In [26]:
c2 = """
INSERT OR IGNORE INTO team_appearance
    SELECT
        h_name,
        game_id,
        1 AS home,
        h_league,
        h_score,
        h_line_score,
        h_at_bats,
        h_hits,
        h_doubles,
        h_triples,
        h_homeruns,
        h_rbi,
        h_sacrifice_hits,
        h_sacrifice_flies,
        h_hit_by_pitch,
        h_walks,
        h_intentional_walks,
        h_strikeouts,
        h_stolen_bases,
        h_caught_stealing,
        h_grounded_into_double,
        h_first_catcher_interference,
        h_left_on_base,
        h_pitchers_used,
        h_individual_earned_runs,
        h_team_earned_runs,
        h_wild_pitches,
        h_balks,
        h_putouts,
        h_assists,
        h_errors,
        h_passed_balls,
        h_double_plays,
        h_triple_plays
    FROM game_log

UNION

    SELECT    
        v_name,
        game_id,
        0 AS home,
        v_league,
        v_score,
        v_line_score,
        v_at_bats,
        v_hits,
        v_doubles,
        v_triples,
        v_homeruns,
        v_rbi,
        v_sacrifice_hits,
        v_sacrifice_flies,
        v_hit_by_pitch,
        v_walks,
        v_intentional_walks,
        v_strikeouts,
        v_stolen_bases,
        v_caught_stealing,
        v_grounded_into_double,
        v_first_catcher_interference,
        v_left_on_base,
        v_pitchers_used,
        v_individual_earned_runs,
        v_team_earned_runs,
        v_wild_pitches,
        v_balks,
        v_putouts,
        v_assists,
        v_errors,
        v_passed_balls,
        v_double_plays,
        v_triple_plays
    from game_log;
"""
run_command(c2)

In [27]:
q = """
SELECT * FROM team_appearance
WHERE game_id = (
                 SELECT MIN(game_id) from game
                )
   OR game_id = (
                 SELECT MAX(game_id) from game
                )
ORDER By game_id, home;
"""
run_query(q)

Unnamed: 0,team_id,game_id,home,league_id,score,line_score,at_bats,hits,doubles,triples,...,individual_earned_runs,team_earned_runs,wild_pitches,balks,putouts,assists,errors,passed_balls,double_plays,triple_plays
0,CL1,18710504FW10,0,,0,000000000,30,4,1,0,...,1,1,0,0,27,9,0,3,0,0
1,FW1,18710504FW10,1,,2,010010000,31,4,1,0,...,0,0,0,0,27,3,3,1,1,0
2,MIA,20161002WAS0,0,NL,7,000230020,38,14,1,1,...,10,10,1,0,24,11,0,0,1,0
3,WAS,20161002WAS0,1,NL,10,03023002x,30,10,2,0,...,7,7,1,0,27,11,0,0,1,0


In [28]:
show_table_schema("team_appearance")

CREATE TABLE team_appearance(
    team_id TEXT,
    game_id TEXT,
    home BOOLEAN,
    league_id TEXT,
    score INTEGER,
    line_score TEXT,
    at_bats INTEGER,
    hits INTEGER,
    doubles INTEGER,
    triples INTEGER,
    homeruns INTEGER,
    rbi INTEGER,
    sacrifice_hits INTEGER,
    sacrifice_flies INTEGER,
    hit_by_pitch INTEGER,
    walks INTEGER,
    intentional_walks INTEGER,
    strikeouts INTEGER,
    stolen_bases INTEGER,
    caught_stealing INTEGER,
    grounded_into_double INTEGER,
    first_catcher_interference INTEGER,
    left_on_base INTEGER,
    pitchers_used INTEGER,
    individual_earned_runs INTEGER,
    team_earned_runs INTEGER,
    wild_pitches INTEGER,
    balks INTEGER,
    putouts INTEGER,
    assists INTEGER,
    errors INTEGER,
    passed_balls INTEGER,
    double_plays INTEGER,
    triple_plays INTEGER,
    PRIMARY KEY (team_id, game_id),
    FOREIGN KEY (game_id) REFERENCES game(game_id),
    FOREIGN KEY (league_id) REFERENCES league(league_id),


In [30]:
show_table_schema("appearance_type")

CREATE TABLE "appearance_type" (
"appearance_type_id" TEXT,
  "name" TEXT,
  "category" TEXT
)


## Adding the Person Appearance Table


In [39]:
c1 = """
CREATE TABLE IF NOT EXISTS person_appearance(
    appearance_id INTEGER PRIMARY KEY,
    person_id TEXT,
    team_id TEXT,
    game_id TEXT,
    appearance_type_id TEXT,
    FOREIGN KEY (person_id) REFERENCES person(person_id),
    FOREIGN KEY (team_id) REFERENCES team(team_id),
    FOREIGN KEY (game_id) REFERENCES game(game_id),
    FOREIGN KEY (appearance_type_id) REFERENCES appearance_type(appearance_type_id)
);
"""
run_command(c1)

c2 = """
INSERT OR IGNORE INTO person_appearance (
    game_id,
    team_id,
    person_id,
    appearance_type_id
) 
    SELECT
        game_id,
        NULL,
        hp_umpire_id,
        "UHP"
    FROM game_log
    WHERE hp_umpire_id IS NOT NULL    

UNION

    SELECT
        game_id,
        NULL,
        [1b_umpire_id],
        "U1B"
    FROM game_log
    WHERE "1b_umpire_id" IS NOT NULL

UNION

    SELECT
        game_id,
        NULL,
        [2b_umpire_id],
        "U2B"
    FROM game_log
    WHERE [2b_umpire_id] IS NOT NULL

UNION

    SELECT
        game_id,
        NULL,
        [3b_umpire_id],
        "U3B"
    FROM game_log
    WHERE [3b_umpire_id] IS NOT NULL

UNION

    SELECT
        game_id,
        NULL,
        lf_umpire_id,
        "ULF"
    FROM game_log
    WHERE lf_umpire_id IS NOT NULL

UNION

    SELECT
        game_id,
        NULL,
        rf_umpire_id,
        "URF"
    FROM game_log
    WHERE rf_umpire_id IS NOT NULL

UNION

    SELECT
        game_id,
        v_name,
        v_manager_id,
        "MM"
    FROM game_log
    WHERE v_manager_id IS NOT NULL

UNION

    SELECT
        game_id,
        h_name,
        h_manager_id,
        "MM"
    FROM game_log
    WHERE h_manager_id IS NOT NULL

UNION

    SELECT
        game_id,
        CASE
            WHEN h_score > v_score THEN h_name
            ELSE v_name
            END,
        winning_pitcher_id,
        "AWP"
    FROM game_log
    WHERE winning_pitcher_id IS NOT NULL

UNION

    SELECT
        game_id,
        CASE
            WHEN h_score < v_score THEN h_name
            ELSE v_name
            END,
        losing_pitcher_id,
        "ALP"
    FROM game_log
    WHERE losing_pitcher_id IS NOT NULL

UNION

    SELECT
        game_id,
        CASE
            WHEN h_score > v_score THEN h_name
            ELSE v_name
            END,
        saving_pitcher_id,
        "ASP"
    FROM game_log
    WHERE saving_pitcher_id IS NOT NULL

UNION

    SELECT
        game_id,
        CASE
            WHEN h_score > v_score THEN h_name
            ELSE v_name
            END,
        winning_rbi_batter_id,
        "AWB"
    FROM game_log
    WHERE winning_rbi_batter_id IS NOT NULL

UNION

    SELECT
        game_id,
        v_name,
        v_starting_pitcher_id,
        "PSP"
    FROM game_log
    WHERE v_starting_pitcher_id IS NOT NULL

UNION

    SELECT
        game_id,
        h_name,
        h_starting_pitcher_id,
        "PSP"
    FROM game_log
    WHERE h_starting_pitcher_id IS NOT NULL;
"""
run_command(c1)
run_command(c2)



In [41]:
q = """
SELECT
    *
FROM
    person_appearance
LIMIT
    5;
"""
run_query(q)

Unnamed: 0,appearance_id,person_id,team_id,game_id,appearance_type_id
0,1,boakj901,,18710504FW10,UHP
1,2,paboc101,CL1,18710504FW10,MM
2,3,prata101,CL1,18710504FW10,ALP
3,4,prata101,CL1,18710504FW10,PSP
4,5,lennb101,FW1,18710504FW10,MM


In [43]:
show_table_schema("person_appearance")

CREATE TABLE person_appearance(
    appearance_id INTEGER PRIMARY KEY,
    person_id TEXT,
    team_id TEXT,
    game_id TEXT,
    appearance_type_id TEXT,
    FOREIGN KEY (person_id) REFERENCES person(person_id),
    FOREIGN KEY (team_id) REFERENCES team(team_id),
    FOREIGN KEY (game_id) REFERENCES game(game_id),
    FOREIGN KEY (appearance_type_id) REFERENCES appearance_type(appearance_type_id)
)


## Removing the original tables
Tables created from imported original CSVs are removed. 
 - ```game_log```
 - ```park_codes```
 - ```team_codes```
 - ```person_codes```

In [45]:
# Before removal
show_tables()

Unnamed: 0,name,type
0,game_log,table
1,person_codes,table
2,park_codes,table
3,team_codes,table
4,person,table
5,park,table
6,league,table
7,team,table
8,game,table
9,team_appearance,table


In [47]:
for table, data in tables.items():
    c1 = """
    DROP TABLE IF EXISTS {};
    """.format(table)
    
    run_command(c1)

In [48]:
show_tables()

Unnamed: 0,name,type
0,person,table
1,park,table
2,league,table
3,team,table
4,game,table
5,team_appearance,table
6,person_appearance,table
7,appearance_type,table


In [51]:
show_table_schema("")

CREATE TABLE person (
    person_id TEXT PRIMARY KEY,
    first_name TEXT,
    last_name TEXT
)
CREATE TABLE park (
    park_id TEXT PRIMARY KEY,
    name TEXT,
    nickname TEXT,
    city TEXT,
    state TEXT,
    notes TEXT
)
CREATE TABLE league (
    league_id TEXT PRIMARY KEY,
    league_name TEXT
)
CREATE TABLE team (
    team_id TEXT PRIMARY KEY,
    league_id TEXT,
    city TEXT,
    nickname TEXT,
    franch_id TEXT,
    FOREIGN KEY (league_id) REFERENCES league(league_id)
)
CREATE TABLE game (
    game_id TEXT PRIMARY KEY,
    date TEXT,
    number_of_game INTEGER,
    park_id TEXT,
    length_outs INTEGER,
    day INTEGER,
    completion TEXT,
    forefeit TEXT,
    protest TEXT,
    attendance INTEGER,
    length_minutes INTEGER,
    additional_info TEXT,
    acquisition_info TEXT,
    FOREIGN KEY (park_id) REFERENCES park(park_id)
)
CREATE TABLE team_appearance(
    team_id TEXT,
    game_id TEXT,
    home BOOLEAN,
    league_id TEXT,
    score INTEGER,
    line_score TEXT,