<h1>Add Machine Learning Model CSV results to SQL Database</h1>

<h4>Import Dependencies</h4>

In [1]:
import os
import csv
import pandas as pd
import random

import sqlite3
import csv
from sqlalchemy import create_engine

from config import pgPassword

<h4>Create paths to CSV files</h4>

In [2]:
pathTeams = os.path.join("..", "..", "data", "csv/models", "teamPredictions.csv")
pathRunsScore = os.path.join("..", "..", "data", "csv/models", "rpgPredictions.csv")
pathHomeRuns = os.path.join("..", "..", "data", "csv/models", "HRpgPredictions.csv")
pathBaseHits = os.path.join("..", "..", "data", "csv/models", "hpgPredictions.csv")
pathWalks = os.path.join("..", "..", "data", "csv/models", "BBpgPredictions.csv")

<h4>Load CSV files into DataFrame</h4>

In [3]:
# Read Team Predictions csv into a DataFrame
teamPredictDF = pd.read_csv(pathTeams)
teamPredictDF = teamPredictDF.rename(columns={'Unnamed: 0': 'ID'})
teamPredictDF = teamPredictDF.rename(columns={'model_type': 'modelType'})
teamPredictDF['ID'] = teamPredictDF.index
print(teamPredictDF.shape)
teamPredictDF.head()

(4192, 6)


Unnamed: 0,ID,yearID,teamID,actual,model,modelType
0,0,1982,ANA-1982,0.574074,0.520139,ML-EN-T1
1,1,1982,ANA-1982,0.574074,0.53295,ML-LN-T1
2,2,1982,ANA-1982,0.574074,0.49953,ML-LS-T1
3,3,1982,ANA-1982,0.574074,0.532837,ML-RD-T1
4,4,1983,ANA-1983,0.432099,0.493408,ML-EN-T1


In [4]:
# Read Players Runs Score Predictions csv into a DataFrame
runsScoreDF = pd.read_csv(pathRunsScore)
runsScoreDF = runsScoreDF.rename(columns={'Unnamed: 0': 'ID'})
runsScoreDF = runsScoreDF.rename(columns={'model_type': 'modelType'})
runsScoreDF['ID'] = runsScoreDF.index
print(runsScoreDF.shape)
runsScoreDF.head()

(835324, 6)


Unnamed: 0,ID,yearID,fpID,actual,model,modelType
0,0,1982,1000,0.0,0.020441,ML-LN-T1
1,1,1983,1000,0.033333,0.030971,ML-LN-T1
2,2,1984,1000,0.095238,0.087524,ML-LN-T1
3,3,1985,1000,0.0,0.027154,ML-LN-T1
4,4,1983,1003,0.0,0.028552,ML-LN-T1


In [5]:
# Read csv into a DataFrame
homeRunsDF = pd.read_csv(pathHomeRuns)
homeRunsDF = homeRunsDF.rename(columns={'Unnamed: 0': 'ID'})
homeRunsDF = homeRunsDF.rename(columns={'model_type': 'modelType'})
homeRunsDF['ID'] = homeRunsDF.index
print(homeRunsDF.shape)
homeRunsDF.head()

(835324, 6)


Unnamed: 0,ID,yearID,fpID,actual,model,modelType
0,0,1982,1000,0.0,0.005417,ML-LN-T1
1,1,1983,1000,0.0,0.00424,ML-LN-T1
2,2,1984,1000,0.0,0.011114,ML-LN-T1
3,3,1985,1000,0.0,0.000985,ML-LN-T1
4,4,1983,1003,0.0,0.007112,ML-LN-T1


In [6]:
# Read csv into a DataFrame
baseHitsDF = pd.read_csv(pathBaseHits)
baseHitsDF = baseHitsDF.rename(columns={'Unnamed: 0': 'ID'})
baseHitsDF = baseHitsDF.rename(columns={'model_type': 'modelType'})
baseHitsDF['ID'] = baseHitsDF.index
print(baseHitsDF.shape)
baseHitsDF.head()

(835324, 6)


Unnamed: 0,ID,yearID,fpID,actual,model,modelType
0,0,1982,1000,0.0,0.041308,ML-LN-T1
1,1,1983,1000,0.033333,0.057228,ML-LN-T1
2,2,1984,1000,0.095238,0.117268,ML-LN-T1
3,3,1985,1000,0.0,0.040886,ML-LN-T1
4,4,1983,1003,0.028571,0.066635,ML-LN-T1


In [7]:
# Read csv into a DataFrame
walksDF = pd.read_csv(pathWalks)
walksDF = walksDF.rename(columns={'Unnamed: 0': 'ID'})
walksDF = walksDF.rename(columns={'model_type': 'modelType'})
walksDF['ID'] = walksDF.index 
print(walksDF.shape)
walksDF.head()

(835324, 6)


Unnamed: 0,ID,yearID,fpID,actual,model,modelType
0,0,1982,1000,0.0,0.012063,ML-LN-T1
1,1,1983,1000,0.0,0.01242,ML-LN-T1
2,2,1984,1000,0.285714,0.131738,ML-LN-T1
3,3,1985,1000,0.0,0.06961,ML-LN-T1
4,4,1983,1003,0.0,0.01922,ML-LN-T1


<h1>===========================================================</h1>

<h1>Set up SQL DataBase</h1>

Before running of the cells below- <b>
    in pgAdmin: drop and create above tables with blank content in DataBase named 'baseball_db'

<h4>Create a connection to SQL database</h4>

In [8]:
pg_user = 'postgres'
pg_password = pgPassword
db_name = 'baseball_db'

connection_string = f"{pg_user}:{pg_password}@localhost:5432/{db_name}"
engine = create_engine(f'postgresql://{connection_string}')

In [9]:
engine.table_names()

['Team-Stats',
 'Teams',
 'FranchisePlayers',
 'Batting',
 'TeamPredictions',
 'Pitching',
 'Salaries',
 'Players',
 'PlayerPredictions-Walks',
 'PlayerPredictions-BaseHits',
 'PlayerPredictions-RunsScore',
 'PlayerPredictions-HomeRuns',
 'Franchises']

In [10]:
teamsDF = pd.read_sql_table("Teams", con = engine)
teamsDF.head()

Unnamed: 0,teamID,TeamName
0,BS1-1871,Boston Red Stockings
1,CH1-1871,Chicago White Stockings
2,CL1-1871,Cleveland Forest Citys
3,FW1-1871,Fort Wayne Kekiongas
4,NY2-1871,New York Mutuals


In [11]:
mergeTeamsDF = pd.merge(teamPredictDF, teamsDF, on="teamID")
print(mergeTeamsDF.shape)
mergeTeamsDF.head()

(2360, 7)


Unnamed: 0,ID,yearID,teamID,actual,model,modelType,TeamName
0,60,1997,ANA-1997,0.518519,0.515491,ML-EN-T1,Anaheim Angels
1,61,1997,ANA-1997,0.518519,0.49918,ML-LN-T1,Anaheim Angels
2,62,1997,ANA-1997,0.518519,0.499687,ML-LS-T1,Anaheim Angels
3,63,1997,ANA-1997,0.518519,0.499246,ML-RD-T1,Anaheim Angels
4,64,1998,ANA-1998,0.524691,0.504953,ML-EN-T1,Anaheim Angels


In [12]:
teamPredictDF2 = mergeTeamsDF.copy()
teamPredictDF2['ID'] = teamPredictDF2.index
teamPredictDF2 = teamPredictDF2[teamPredictDF2.columns.drop('TeamName')]
print(teamPredictDF2.shape)
teamPredictDF2.head()

(2360, 6)


Unnamed: 0,ID,yearID,teamID,actual,model,modelType
0,0,1997,ANA-1997,0.518519,0.515491,ML-EN-T1
1,1,1997,ANA-1997,0.518519,0.49918,ML-LN-T1
2,2,1997,ANA-1997,0.518519,0.499687,ML-LS-T1
3,3,1997,ANA-1997,0.518519,0.499246,ML-RD-T1
4,4,1998,ANA-1998,0.524691,0.504953,ML-EN-T1


<h4>Add dataframes to SQL database</h4>

In [13]:
teamPredictDF2.to_sql(name = "TeamPredictions", con = engine, if_exists='append', index=False)
print("team predictions loaded success...")

team predictions loaded success...


In [14]:
runsScoreDF.to_sql(name = "PlayerPredictions-RunsScore", con = engine, if_exists='append', index=False)
print("runs score predictions loaded success...")

runs score predictions loaded success...


In [15]:
homeRunsDF.to_sql(name = "PlayerPredictions-HomeRuns", con = engine, if_exists='append', index=False)
print("home runs predictions loaded success...")

home runs predictions loaded success...


In [16]:
baseHitsDF.to_sql(name = "PlayerPredictions-BaseHits", con = engine, if_exists='append', index=False)
print("base hits predictions loaded success...")

base hits predictions loaded success...


In [17]:
walksDF.to_sql(name = "PlayerPredictions-Walks", con = engine, if_exists='append', index=False)
print("walks predictions loaded success...")

walks predictions loaded success...
