<h1>SQLite Database Creator</h1>
<p>This notebook aggreates a range of data collected for the take-a-seat project and converts it into an SQLite database based on the following schema: <img src="schema.png" alt="Schema for Database"></p>
<p>The data is converted into csvs, then added into the SQLite .db</p>

In [84]:
import pandas as pd
import numpy as np
import os
from datetime import datetime as dt, timedelta
from dateutil.relativedelta import *
import re
from matplotlib import pyplot as plt
import sqlite3
import os

<h3>SQLite Handling functions</h3>
<p>Use Pandas to import dataframe into SQLlite</p>

In [85]:
DB = "hockey_data_goalies.db"

#SQL Handle functions
def run_query(q):
    with sqlite3.connect(DB) as conn:
        x = pd.read_sql(q,conn)
    return x
   
#send command
def run_command(c):
    with sqlite3.connect(DB) as conn:
        conn.isolation_level = None
        return conn.execute(c)
    
#show tables
def show_tables():
    r = """
    Select name,type
    FROM sqlite_master
    WHERE type in ("table","view");
        """
    return run_query(r)

<h2>Import and Clean up csvs</h2>
    <p><ul>
        <li><b>team_list</b>: This csv gets piped straight into its own table</li>
        <li><b>player_list</b>: Only count those players who have played more than 113 games (median no)</li>
</ul><p>

In [86]:
#import team list data
teams = pd.read_csv('team_list.csv',encoding = "ISO-8859-1")
teams['longitude'] = teams['longitude'].astype(float)
teams.drop(columns=['index'],inplace=True)
teams = teams.set_index('team_abbr')
teams.head()

Unnamed: 0_level_0,team_name,years_active,city,latitude,longitude,timezone
team_abbr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ANA,Anaheim Ducks,2006-2020,Anaheim,33.808,-117.876,-8
MDA,Mighty Ducks of Anaheim,1993-2006,Anaheim,33.808,-117.876,-8
ARI,Arizona Coyotes,2014-2020,Glendale,33.532,-112.261,-7
PHX,Phoenix Coyotes,1996-2014,Glendale,33.532,-112.261,-7
WIN,Winnipeg Jets,1990-1996,Winnipeg,49.893,-97.144,-6


<h2>Wipe Database first</h2>
<p>Since we're building the db from scratch here</p>

In [87]:
tables = show_tables() #gets the tables in the db
with sqlite3.connect(DB) as conn:
    for row,data in tables.iterrows():
        run_command("DROP TABLE IF EXISTS {}".format(tables.loc[row,'name'])) #clear DB of existing tables

<h3>Enter Team Data</h3>

In [88]:
#make a new table with the appropriate keys
c1 = """
    CREATE TABLE IF NOT EXISTS team_list (
    team_abbr TEXT PRIMARY KEY,
    team_name TEXT,
    years_active TEXT,
    city TEXT,
    latitude REAL,
    longitude REAL,
    timezone INTEGER
);
"""
run_command(c1)

#inject data into the table
teams.to_sql('team_list',con=sqlite3.connect(DB), if_exists='append')

<h2>Enter Player Data</h2>

In [89]:
#import player list data
players = pd.read_csv('nhlplayerlistr3.txt','\t')
players.drop(columns=['Unnamed: 0','link','height','weight'],inplace=True)
players = players.set_index('unique_id')
#sort based ons whether or not there is injury data
players = players[(players['position']=='G')&(players['total_games']>10)].copy()

In [90]:
#make a new table with the appropriate keys #drop position as we're only dealing with Goalies
c1 = """
    CREATE TABLE IF NOT EXISTS player_list (
    unique_id TEXT PRIMARY KEY,
    player TEXT,
    year_start INTEGER,
    year_finish INTEGER,
    height_cm INTEGER,
    weight_kg INTEGER,
    total_games INTEGER
);
"""

run_command(c1)

#inject data into the table
players[['player','year_start','year_finish','height_cm','weight_kg','total_games']].to_sql('player_list',con=sqlite3.connect(DB), if_exists='append')

<h2>Enter Game Logs by Team</h2>

In [93]:
#make a new table with the appropriate keys #drop position as we're only dealing with Goalies
c1 = """
    CREATE TABLE IF NOT EXISTS team_log (
    season_id TEXT PRIMARY KEY,
    team_id TEXT,
    date_game TEXT,
    game_location BLOB,
    opp_name TEXT,
    goals INTEGER,
    opp_goals INTEGER,
    game_outcome TEXT,
    overtimes TEXT,
    shots INTEGER,
    shots_against INTEGER,
    FOREIGN KEY (team_id) REFERENCES team_list(team_id)
);
"""

run_command(c1)

<sqlite3.Cursor at 0x2e5691f2c00>

In [94]:
#import player list into a dataframe
for files in os.listdir('C:\\Users\\jesse\\Documents\\Projects\\takeaseat\\Data\\team_gamelogs\\'):
    season_log = pd.read_csv('team_gamelogs\\'+files,'\t')
    season_log['team_id'] = files[:3]
    season_log['season_id'] = season_log['team_id']+season_log['date_game'].astype('datetime64').map(lambda x:x.strftime('%Y%m%d'))
    season_log.drop(columns=['Unnamed: 0'],inplace=True)
    season_log = season_log.set_index('season_id')
    season_log[['team_id','date_game','game_location', 'opp_name', 'goals','opp_goals','game_outcome','overtimes','shots','shots_against']].to_sql('team_log',con=sqlite3.connect(DB), if_exists='append')

<h2>Player Logs</h2>
Now add goalie player logs

In [None]:
#make a new table with the appropriate keys
c1 = """
    CREATE TABLE IF NOT EXISTS player_log(
    gamelog_id TEXT PRIMARY KEY,
    player_id TEXT,
    team_id TEXT,
    date_game TEXT,
    age REAL,
    min_season REAL,
    min_3w REAL,
    days_to_next_g INTEGER,
    timec REAL,
    venuec REAL,
    injured INT,
    FOREIGN KEY (player_id) REFERENCES player_list(unique_id)
    FOREIGN KEY (team_id) REFERENCES team_list(team_id)
);"""
            
run_command(c1)
show_tables()