# Create PostgreSQL Database of Historic Data

### Imports & Versions

In [4]:
import ujson
print("ujson v.{}".format(ujson.__version__))

import pandas as pd
print("pandas v.{}".format(pd.__version__))

import sqlalchemy
print("SqlAlchemy v.{}".format(sqlalchemy.__version__))

ujson v.1.35
pandas v.0.19.2
SqlAlchemy v.1.1.5


### Open PostgreSQL Credentials from local file

In [7]:
credentials = ujson.load(open("D:\\Python\\posgresql_credentials.json","r"))

In [8]:
#availible databases
credentials['databases']

['temp_db_rental', 'ncaa_mbb']

### Define Function to Connect to Database

In [9]:
def pg_connect(user, password, db, host='localhost',port=5432):
    '''Returns a connection and metada SqlAlchemy Object'''
    url = 'postgresql://{}:{}@{}:{}/{}'
    url = url.format(user, password, host, port, db)
    engine = sqlalchemy.create_engine(url, client_encoding = 'utf8')
    meta = sqlalchemy.MetaData()#bind = con, reflect = True)
    
    return (engine, meta)

### Create the engine/con, meta and connection objects

In [10]:
engine, meta = pg_connect(credentials['user'],
                       credentials['password'],
                       credentials['databases'][1])
connection = engine.connect()

In [12]:
#existing tables
print(engine.table_names())

[]


### Create Blank Table for Teams

In [13]:
#Create Team Table (blank)
teams = sqlalchemy.Table('teams', meta,
                        sqlalchemy.Column('team_id', sqlalchemy.String, primary_key = True),
                        sqlalchemy.Column('team_name', sqlalchemy.String))
meta.create_all(engine)

In [14]:
#ensure the table was created
print(engine.table_names())

['teams']


### Create Blank Table for Seeds

In [15]:
#Create Tournament Seed table (blank)
tourney_seeds = sqlalchemy.Table('tourney_seeds',meta,
                                 sqlalchemy.Column('season', sqlalchemy.String),
                                 sqlalchemy.Column('seed', sqlalchemy.String),
                                 sqlalchemy.Column('team',sqlalchemy.String, sqlalchemy.ForeignKey('teams.team_id')))
meta.create_all(engine)

In [16]:
print(engine.table_names())

['teams', 'tourney_seeds']


### Pull Team Data into Dataframe from local .csv File

In [17]:
#Path to .csv files that contain the data to be added to these tables
file_path = "D:\\_data_science_data\\2017_march_madness\\tourney17\\"
#files: TourneySeeds.csv; Teams.csv

In [24]:
#Load Teams dataframe
teams_df = pd.read_csv(file_path+'Teams.csv')
teams_df = teams_df.rename(index = str, columns = {'Team_Id':'team_id', 'Team_Name':'team_name'})
teams_df.head()

Unnamed: 0,team_id,team_name
0,1101,Abilene Chr
1,1102,Air Force
2,1103,Akron
3,1104,Alabama
4,1105,Alabama A&M


### Append Data from Teams DataFrame to existing PostgreSQL Table

In [25]:
#insert data into existing table by appending it to the end
teams_df.to_sql('teams', engine, if_exists = 'append', index = False)

In [26]:
#Validate insertion by running select statemnt against table
stmt = 'SELECT * FROM teams LIMIT 6'
res_proxy = connection.execute(stmt)
results = res_proxy.fetchall()
for res in results:
    print(res)

('1101', 'Abilene Chr')
('1102', 'Air Force')
('1103', 'Akron')
('1104', 'Alabama')
('1105', 'Alabama A&M')
('1106', 'Alabama St')


### Pull Seed Data into DataFrame from local .csv File

In [29]:
#Load Seeds dataframe
seeds_df = pd.read_csv(file_path+'TourneySeeds.csv')
seeds_df = seeds_df.rename(index = str, columns = {'Season':'season', 'Seed':'seed', 'Team':'team'})
seeds_df.head()

Unnamed: 0,season,seed,team
0,1985,W01,1207
1,1985,W02,1210
2,1985,W03,1228
3,1985,W04,1260
4,1985,W05,1374


### Append Data from Seed DataFrame to existing PostgreSQL Table

In [30]:
#insert data into existing table by appending it to the end
seeds_df.to_sql('tourney_seeds', engine, if_exists = 'append', index = False)

In [31]:
#Validate insertion by running select statemnt against table
stmt = 'SELECT * FROM tourney_seeds LIMIT 6'
res_proxy = connection.execute(stmt)
results = res_proxy.fetchall()
for res in results:
    print(res)

('1985', 'W01', '1207')
('1985', 'W02', '1210')
('1985', 'W03', '1228')
('1985', 'W04', '1260')
('1985', 'W05', '1374')
('1985', 'W06', '1208')


### Validate Keys in Table(s) by pulling a Select with a join

In [32]:
#Validate insertion by running select statemnt against table
stmt = 'SELECT tourney_seeds.season, tourney_seeds.seed, teams.team_name \
        FROM tourney_seeds \
        INNER JOIN teams \
        ON tourney_seeds.team=teams.team_id \
        LIMIT 6'
res_proxy = connection.execute(stmt)
results = res_proxy.fetchall()
for res in results:
    print(res)

('1985', 'W01', 'Georgetown')
('1985', 'W02', 'Georgia Tech')
('1985', 'W03', 'Illinois')
('1985', 'W04', 'Loyola-Chicago')
('1985', 'W05', 'SMU')
('1985', 'W06', 'Georgia')


### Disconnect From Database when done

In [33]:
connection.close()