In [None]:
# Carter Naegle & Cabot Steward
from dotenv import load_dotenv
import os
import psycopg2
import pymssql
import pandas as pd
import datetime
import csv

    
load_dotenv()


def get_connections():

    try:
        post_conn = psycopg2.connect(
            database=os.getenv('POSTGRES_DB'),
            user=os.getenv('POSTGRES_USER'),
            password=os.getenv('POSTGRES_PASSWORD'),
            host=os.getenv('POSTGRES_HOST'),
            port=os.getenv('POSTGRES_PORT')
        )
        post_cursor = post_conn.cursor()

        ssms_conn = pymssql.connect(
            server=os.getenv('MSSQL_SERVER'),
            user=os.getenv('MSSQL_USER'),
            password=os.getenv('MSSQL_PASSWORD'),
            database=os.getenv('MSSQL_DB')
        )
        ssms_cursor = ssms_conn.cursor()

        return {
            'post_conn': post_conn,
            'post_cursor': post_cursor,
            'ssms_conn': ssms_conn,
            'ssms_cursor': ssms_cursor
        }
    except Exception as e:
        print(f"Error connecting to the database: {e}")
        return None


In [None]:
tables = [
  """CREATE TABLE IF NOT EXISTS weather_station (
    stadium_weather_station_id SERIAL PRIMARY KEY,
    stadium_weather_station_code VARCHAR(20),
    station_name VARCHAR(70),
    latitude NUMERIC(9,5),
    longitude NUMERIC(9,5),
    elevation NUMERIC(7,2)
  );""",
  """CREATE TABLE IF NOT EXISTS nfl_stadium (
    stadium_id SERIAL PRIMARY KEY,
    stadium_name VARCHAR(50) NOT NULL,
    stadium_city VARCHAR(50),
    stadium_state VARCHAR(20),
    stadium_open_year DATE,
    stadium_close_year DATE,
    stadium_type VARCHAR(15),
    stadium_surface VARCHAR(50),
    stadium_capacity INT,
    stadium_weather_type VARCHAR(10),
    stadium_weather_station_id INT,
    CONSTRAINT FK_nfl_stadium_weather_station_id
      FOREIGN KEY (stadium_weather_station_id)
      REFERENCES weather_station(stadium_weather_station_id)
  );""",
  """CREATE TABLE IF NOT EXISTS customer (
    customer_id SERIAL PRIMARY KEY,
    customer_first_name VARCHAR(50) NOT NULL,
    customer_last_name VARCHAR(50) NOT NULL,
    customer_age SMALLINT NOT NULL,
    customer_type VARCHAR(25) NOT NULL,
    customer_since DATE NOT NULL,
    customer_income INT,
    household_size SMALLINT,
    mode_color VARCHAR(10)
  );""",
  """CREATE TABLE IF NOT EXISTS teams (
    team_id SERIAL PRIMARY KEY,
    team_name VARCHAR(50) NOT NULL,
    team_name_short VARCHAR(25) NOT NULL,
    team_abv VARCHAR(4) NOT NULL,
    team_abv_pfr VARCHAR(4) NOT NULL,
    team_conference VARCHAR(4) NOT NULL,
    team_division VARCHAR(12),
    team_conference_pre2002 VARCHAR(4) NOT NULL,
    team_division_pre2002 VARCHAR(15)
  );""",
    """CREATE TABLE IF NOT EXISTS game (
      game_outcome_id SERIAL PRIMARY KEY,
      schedule_date DATE NOT NULL,
      schedule_season INT NOT NULL,
      schedule_week VARCHAR(12) NOT NULL,
      schedule_playoff BOOLEAN NOT NULL,
      team_id_home INT NOT NULL,
      score_home SMALLINT NOT NULL,
      team_id_away INT NOT NULL,
      score_away SMALLINT NOT NULL,
      winner_ou VARCHAR(4) NOT NULL,
      winner_line VARCHAR(4) NOT NULL,
      team_favored INT,
      favored_spread NUMERIC(3,1) NOT NULL,
      over_under_line NUMERIC(3,1) NOT NULL,
      stadium_id INT NOT NULL,
      stadium_neutral BOOLEAN NOT NULL,
      weather_temperature SMALLINT,
      weather_wind_mph SMALLINT,
      weather_humidity SMALLINT,
      weather_detail VARCHAR(15),
      CONSTRAINT FK_game_stadium_id
        FOREIGN KEY (stadium_id)
        REFERENCES nfl_stadium(stadium_id),
      CONSTRAINT FK_game_team_id_home
        FOREIGN KEY (team_id_home)
        REFERENCES teams(team_id),
      CONSTRAINT FK_game_team_id_away
        FOREIGN KEY (team_id_away)
        REFERENCES teams(team_id),
        CONSTRAINT FK_game_team_favored
        FOREIGN KEY (team_favored)
        REFERENCES teams(team_id)
    );""",
  """CREATE TABLE IF NOT EXISTS placed_bet (
    bet_id SERIAL PRIMARY KEY,
    customer_id INT NOT NULL,
    game_outcome_id INT NOT NULL,
    bet_amount SMALLINT NOT NULL,
    bet_result VARCHAR(4),
    commision_paid NUMERIC(8,2) NOT NULL,
    bet_on VARCHAR(40),
    bet_type VARCHAR(10),
    CONSTRAINT FK_placed_bet_customer_id
      FOREIGN KEY (customer_id)
      REFERENCES customer(customer_id),
    CONSTRAINT FK_placed_bet_game_outcome_id
      FOREIGN KEY (game_outcome_id)
      REFERENCES game(game_outcome_id)
  );"""
]

for i in tables:
  post_cursor.execute(i)

post_conn.commit()

In [2]:
# Populate Customer Table
import datetime
query = """SELECT * FROM customer_table """

df = pd.read_sql(query, ssms_conn)
df[['customer_first_name', 'customer_last_name']] = df['customer_name'].str.split(n=1, expand=True)

for index, row in df.iterrows():
    customer_since_date = datetime.date(int(row['customer_since']), 1, 1)

    sql_insert_query = """INSERT INTO customer (customer_first_name, customer_last_name, 
    customer_age, customer_type, customer_since, customer_income, household_size, mode_color)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"""
    
    post_cursor.execute(sql_insert_query, (row['customer_first_name'], 
                                           row['customer_last_name'], 
                                           int(row['customer_age']),
                                           row['customer_type'], 
                                           customer_since_date, 
                                           int(row['customer_income']), 
                                           int(row['household_size']),
                                          row['mode_color']))
    print('Record created for ', row['customer_first_name'], row['customer_last_name'])
post_conn.commit()

  df = pd.read_sql(query, ssms_conn)


Record created for  Coraline Flores
Record created for  Presley Ortiz
Record created for  Reid Knight
Record created for  Clarissa Chandler
Record created for  Isaac Vaughn
Record created for  Maleah James
Record created for  Ada Weber
Record created for  Winston Short
Record created for  Jackson Marshall
Record created for  Eleanor Daniel
Record created for  Amanda Reyes
Record created for  Ingrid Maxwell
Record created for  Martin Mueller
Record created for  Victoria Peterson
Record created for  Evan Ramirez
Record created for  Lydia Petersen
Record created for  Jesse McKenzie
Record created for  Teresa Everett
Record created for  Clayton Benson
Record created for  Logan Bailey
Record created for  Colette Bridges
Record created for  Rich Jones
Record created for  Ruby Anderson
Record created for  Benjamin Blankenship
Record created for  Adelina Harris
Record created for  Max Gray
Record created for  Nathaniel Miller
Record created for  Brady Peters
Record created for  Matthew Booth
R

In [None]:
csv = 'nfl_teams.csv'
teams_df = pd.read_csv(csv)

for index, row in teams_df.iterrows():
    teams_insert_query = """INSERT INTO teams (team_name,
                                               team_name_short,
                                               team_abv,
                                               team_abv_pfr,
                                               team_conference,
                                               team_division,
                                               team_conference_pre2002,
                                               team_division_pre2002)
                            VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"""

    post_cursor.execute(teams_insert_query, (
        row['team_name'],
        row['team_name_short'],
        row['team_id'],
        row['team_id_pfr'],
        row['team_conference'],
        row['team_division'],
        row['team_conference_pre2002'], 
        row['team_division_pre2002']
    ))
    
    print(f"{row['team_name']} has been inserted")

post_conn.commit()


Index(['team_name', 'team_name_short', 'team_id', 'team_id_pfr',
       'team_conference', 'team_division', 'team_conference_pre2002',
       'team_division_pre2002'],
      dtype='object')
Arizona Cardinals has been inserted
Atlanta Falcons has been inserted
Baltimore Colts has been inserted
Baltimore Ravens has been inserted
Boston Patriots has been inserted
Buffalo Bills has been inserted
Carolina Panthers has been inserted
Chicago Bears has been inserted
Cincinnati Bengals has been inserted
Cleveland Browns has been inserted
Dallas Cowboys has been inserted
Denver Broncos has been inserted
Detroit Lions has been inserted
Green Bay Packers has been inserted
Houston Oilers has been inserted
Houston Texans has been inserted
Indianapolis Colts has been inserted
Jacksonville Jaguars has been inserted
Kansas City Chiefs has been inserted
Las Vegas Raiders has been inserted
Los Angeles Chargers has been inserted
Los Angeles Raiders has been inserted
Los Angeles Rams has been inserted
Miam

In [None]:
# weather stations
connections = get_connections()

if connections:
    post_conn = connections['post_conn']
    post_cursor = connections['post_cursor']
    ssms_conn = connections['ssms_conn']
    ssms_cursor = connections['ssms_cursor']

csv = 'nfl_stadiums.csv'

weather_df = pd.read_csv(csv, encoding='latin1')

weather_df = weather_df.dropna(subset=['STATION'])
weather_df = weather_df.drop_duplicates(subset=['STATION'])


for index, row in weather_df.iterrows():
    post_cursor.execute(
            'SELECT count(*) FROM weather_station WHERE stadium_weather_station_code = %s;', 
            (row['STATION'],)
        )
    result = post_cursor.fetchone()

    if result[0] != 0:
        print(f'Weather station {row['STATION']} already exists')
        continue


    weather_insert_query = """INSERT INTO weather_station (stadium_weather_station_code,
                                               station_name,
                                               latitude,
                                               longitude,
                                               elevation)
                            VALUES (%s, %s, %s, %s, %s)"""

    post_cursor.execute(weather_insert_query, (
        row['STATION'],
        row['NAME'],
        float(row['LATITUDE']),
        float(row['LONGITUDE']),
        float(row['ELEVATION'])
    ))

    print(f"weather {row['STATION']} has been inserted")

post_conn.commit()


weather USW00094823 has been inserted
weather US1MOJC0028 has been inserted
weather USC00410337 has been inserted
weather USW00013881 has been inserted
weather US1NYER0093 has been inserted
weather USC00238791 has been inserted
weather USW00023234 has been inserted
weather US1WAKG0038 has been inserted
weather USW00003871 has been inserted
weather USW00014820 has been inserted
weather USW00023062 has been inserted
weather USW00093837 has been inserted
weather USC00186350 has been inserted
weather USC00190860 has been inserted
weather USW00014734 has been inserted
weather USW00012839 has been inserted
weather USW00012842 has been inserted
weather USW00014898 has been inserted
weather USW00013739 has been inserted
weather USW00023174 has been inserted
weather USW00013897 has been inserted
weather US1INMR0076 has been inserted
weather USW00093721 has been inserted
weather USW00014922 has been inserted
weather USW00012918 has been inserted
weather USW00023230 has been inserted
weather USW0

In [59]:
connections = get_connections()

if connections:
    post_conn = connections['post_conn']
    post_cursor = connections['post_cursor']
    ssms_conn = connections['ssms_conn']
    ssms_cursor = connections['ssms_cursor']

csv = 'nfl_stadiums.csv'

stadium_df = pd.read_csv(csv, encoding='latin1')
stadium_df[['city', 'state']] = stadium_df['stadium_location'].str.split(', ', expand=True)

for index, row in stadium_df.iterrows():    
    post_cursor.execute(
            'select count(*) from nfl_stadium where stadium_name = %s;', 
            (row['stadium_name'],)
        )
    result = post_cursor.fetchone()

    if result[0] != 0:
        print(f'Stadium {row['stadium_name']} already exists')
        continue
    
    stadium_open = None
    if pd.notna(row['stadium_open']):
        stadium_open = datetime.date(int(row['stadium_open']), 1, 1)
    
    stadium_close = None
    if pd.notna(row['stadium_close']):
        stadium_close = datetime.date(int(row['stadium_close']), 1, 1)

    stadium_capacity = None
    if pd.notna(row['stadium_capacity']):
        stadium_capacity = int(row['stadium_capacity'].replace(',', ''))


    stadium_weather_station_id = None
    if pd.notna(row['STATION']):
        post_cursor.execute(
            'SELECT stadium_weather_station_id FROM weather_station WHERE stadium_weather_station_code = %s;', 
            (row['STATION'],)
        )
        weather_result = post_cursor.fetchone()
        if weather_result:
            stadium_weather_station_id = weather_result[0]

    stadium_surface = None
    if pd.notna(row['stadium_surface']):
        stadium_surface = row['stadium_surface'].split(',')[0].strip()

    stadium_weather_type = None
    if pd.notna(row['stadium_weather_type']):
        stadium_weather_type = row['stadium_capacity']
    
    stadium_type = None
    if pd.notna(row['stadium_type']):
        stadium_type = row['stadium_type']

    

    weather_insert_query = """INSERT INTO nfl_stadium (stadium_weather_station_id,
                                                       stadium_name,
                                                       stadium_city,
                                                       stadium_state,
                                                       stadium_open_year,
                                                       stadium_close_year,
                                                       stadium_type,
                                                       stadium_surface,
                                                       stadium_capacity,
                                                       stadium_weather_type)
                              VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""

    # Execute the query
    post_cursor.execute(weather_insert_query, (
        stadium_weather_station_id,  # Use the fetched weather_station_id or None
        row['stadium_name'],         # stadium_name
        row['city'],                 # stadium_city
        row['state'],                # stadium_state
        stadium_open,                # stadium_open_year
        stadium_close,               # stadium_close_year
        stadium_type,                # stadium_type
        stadium_surface,             # stadium_surface
        stadium_capacity,            # stadium_capacity
        stadium_weather_type         # stadium_weather_type
    ))

    print(f"Stadium {row['stadium_name']} has been inserted")

post_conn.commit()

Weather Acrisure Stadium has been inserted
Weather Alamo Dome has been inserted
Weather Allegiant Stadium has been inserted
Weather Allianz Arena has been inserted
Weather Alltel Stadium has been inserted
Weather Alumni Stadium has been inserted
Weather Anaheim Stadium has been inserted
Weather Arrowhead Stadium has been inserted
Weather AT&T Stadium has been inserted
Weather Atlanta-Fulton County Stadium has been inserted
Weather Balboa Stadium has been inserted
Weather Bank of America Stadium has been inserted
Weather Bills Stadium has been inserted
Weather Busch Memorial Stadium has been inserted
Weather Caesars Superdome has been inserted
Weather Candlestick Park has been inserted
Weather CenturyLink Field has been inserted
Weather Cinergy Field has been inserted
Weather Cleveland Municipal Stadium has been inserted
Weather Cotton Bowl has been inserted
Weather Cowboys Stadium has been inserted
Weather Dolphin Stadium has been inserted
Weather Edward Jones Dome has been inserted
We

In [16]:
# game

# Connection setup (assuming `get_connections` is defined elsewhere)
connections = get_connections()

if connections:
    post_conn = connections['post_conn']
    post_cursor = connections['post_cursor']
    ssms_conn = connections['ssms_conn']
    ssms_cursor = connections['ssms_cursor']

csv_file = 'spread_scores-2.csv'
game = pd.read_csv(csv_file)

# Error log setup
error_csv = 'failed_inserts.csv'
error_rows = []

# Helper functions
def get_team_id(values):
    post_cursor.execute(
        'SELECT team_id FROM teams WHERE team_name = %s;',
        (values,)
    )
    team_id = post_cursor.fetchone()
    return team_id[0] if team_id else None

def get_team_favorite_id(values):
    if values == 'PICK':
        return None
    post_cursor.execute(
        'SELECT team_id FROM teams WHERE team_abv = %s;',
        (values,)
    )
    team_favorite_id = post_cursor.fetchone()
    return team_favorite_id[0] if team_favorite_id else None

def get_stadium_id(values):
    post_cursor.execute(
        'SELECT stadium_id FROM nfl_stadium WHERE stadium_name = %s;',
        (values,)
    )
    stadium_id = post_cursor.fetchone()
    return stadium_id[0] if stadium_id else None

def calculate_betting_results(row):
    total_points = row['score_home'] + row['score_away']
    # Over/Under
    if total_points > row['over_under_line']:
        winner_ou = 'over'
    elif total_points < row['over_under_line']:
        winner_ou = 'under'
    else:
        winner_ou = 'push'

    # Spread
    margin = (
        row['score_home'] - row['score_away']
        if row['team_favorite_id'] == row['team_home']
        else row['score_away'] - row['score_home']
    )
    if margin > row['spread_favorite']:
        winner_line = 'favored'
    elif margin < row['spread_favorite']:
        winner_line = 'underdog'
    else:
        winner_line = 'push'

    return winner_ou, winner_line

# Process data
counter = 0
for index, row in game.iterrows():
    try:
        team_id_home = get_team_id(row['team_home'])
        team_id_away = get_team_id(row['team_away'])
        stadium_id = get_stadium_id(row['stadium'])
        team_id_favorite = get_team_favorite_id(row['team_favorite_id'])

        winner_ou, winner_line = calculate_betting_results(row)

        game_insert_query = """INSERT INTO game (schedule_date,
                                                schedule_season,
                                                schedule_week,
                                                schedule_playoff,
                                                team_id_home,
                                                score_home,
                                                team_id_away,
                                                score_away,
                                                winner_ou,
                                                winner_line,
                                                team_favored,
                                                favored_spread,
                                                over_under_line,
                                                stadium_id,
                                                stadium_neutral)
                                  VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""

        post_cursor.execute(game_insert_query, (
            row['schedule_date'],
            row['schedule_season'],
            row['schedule_week'],
            row['schedule_playoff'],
            team_id_home,
            row['score_home'],
            team_id_away,
            row['score_away'],
            winner_ou,
            winner_line,
            team_id_favorite,
            row['spread_favorite'],
            row['over_under_line'],
            stadium_id,
            row['stadium_neutral']
        ))

        counter += 1
        if counter % 400 == 0:
            print(f"Uploaded {counter} games so far")
            post_conn.commit()

    except Exception as e:
        print(f"Error processing row {index}: {e}")
        # Add problematic row to the error log
        error_rows.append(row.to_dict())

# Commit the rest
post_conn.commit()

# Write failed rows to CSV
if error_rows:
    with open(error_csv, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = game.columns
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(error_rows)

print("Games are uploaded")
if error_rows:
    print(f"Some rows failed. Check {error_csv} for details.")


Uploaded 400 games so far
Uploaded 800 games so far
Uploaded 1200 games so far
Uploaded 1600 games so far
Uploaded 2000 games so far
Uploaded 2400 games so far
Uploaded 2800 games so far
Uploaded 3200 games so far
Uploaded 3600 games so far
Uploaded 4000 games so far
Uploaded 4400 games so far
Uploaded 4800 games so far
Error processing row 4804: null value in column "stadium_id" of relation "game" violates not-null constraint
DETAIL:  Failing row contains (10723, 2019-09-08, 2019, 1, f, 21, 30, 17, 24, over, push, 21, -6.0, 44.5, null, f, null, null, null, null).

Error processing row 4805: current transaction is aborted, commands ignored until end of transaction block

Error processing row 4806: current transaction is aborted, commands ignored until end of transaction block

Error processing row 4807: current transaction is aborted, commands ignored until end of transaction block

Error processing row 4808: current transaction is aborted, commands ignored until end of transaction blo

KeyboardInterrupt: 