In [1]:
import pandas as pd
from sqlalchemy import create_engine
from pathlib import Path

## Import Data and Create Tables

In [2]:
positionPath = Path.cwd() / 'positions.csv'
positions = pd.read_csv(positionPath, header=None)
positions.columns = ['position', 'offenseDefense']
positions.head()

Unnamed: 0,position,offenseDefense
0,QB,offense
1,LB,defense
2,DB,defense
3,WR,offense
4,CB,defense


In [3]:
teamsPath = Path.cwd() / 'teams.csv'
teams = pd.read_csv(teamsPath, header=None)
teams.columns = ['teamName','teamAbbrv','teamState','teamCity']
teams.head()

Unnamed: 0,teamName,teamAbbrv,teamState,teamCity
0,Arizona Cardinals,ARI,Arizona,Glendale
1,Atlanta Falcons,ATL,Georgia,Atlanta
2,Baltimore Ravens,BAL,Maryland,Baltimore
3,Buffalo Bills,BUF,New York,Orchard Park
4,Carolina Panthers,CAR,North Carolina,Charlotte


In [4]:
rostersPath = Path.cwd() / 'rosters.csv'
rosters = pd.read_csv(rostersPath, header=0)
rosters.head()

Unnamed: 0,teamName,playerName,year,salary
0,Drew Brees,NOR,"$26,400,000",2015
1,Philip Rivers,SDG,"$21,166,668",2015
2,Calvin Johnson,DET,"$20,558,000",2015
3,Charles Johnson,CAR,"$20,020,000",2015
4,Matt Ryan,ATL,"$19,500,000",2015


In [5]:
resultsSBPath = Path.cwd() / '[Cleaned]TV-Ratings.csv'
resultsSB = pd.read_csv(resultsSBPath, header= 0)
resultsSB.head()

Unnamed: 0,Year,Winner,Winner Score,Loser,Loser Score,Neilson Rating,Attendance,TV Viewers,Streaming,Spanish,Out Of Home,Total
0,1967,Green Bay Packers,35,Kansas City Chiefs,10,41.1,137492,51180000,0,0,0,51317492
1,1968,Green Bay Packers,33,Oakland Raiders,14,36.8,75546,39120000,0,0,0,39195546
2,1969,New York Jets,16,Baltimore Colts,7,36.0,75389,41660000,0,0,0,41735389
3,1970,Kansas City Chiefs,23,Minnesota Vikings,7,39.4,80562,44270000,0,0,0,44350562
4,1971,Baltimore Colts,16,Dallas Cowboys,13,39.9,79204,46040000,0,0,0,46119204


In [6]:
playersPath = Path.cwd() / 'players.csv'
players = pd.read_csv(playersPath, header= 0)
players.head()

Unnamed: 0,playerID,playerName,position
0,1809,Robert Blackmon,DB
1,23586,Dean Wells,LB
2,355,Kiko Alonso,ILB
3,18182,Steve Ramsey,QB
4,16250,Cory Nelms,CB


## Create Database

In [7]:
engine = create_engine('sqlite:///nflDB.db', echo=False)

#### This next block is primarily for reference of data structure and to allow for dropping tables to ensure no duplicate data when creating database

In [8]:
teams_script = '''
CREATE TABLE "teams" (
  "teamName" varchar,
  "teamAbbrv" varchar PRIMARY KEY,
  "teamState" varchar,
  "teamCity" varchar
);
'''

players_script = '''
CREATE TABLE "players" (
  "playerID" varchar PRIMARY KEY,
  "playerName" varchar,
  "position" varchar
);
'''

rosters_script = '''
CREATE TABLE "rosters" (
  "teamName" varchar PRIMARY KEY,
  "playerName" varchar,
  "year" int,
  "salary" int
);
'''

position_script = '''
CREATE TABLE "positions" (
  "position" varchar PRIMARY KEY,
  "offenseDefense" varchar
)
'''

resultsSB_script = '''
CREATE TABLE "resultsSB" (
    "yearSB" varchar PRIMARY KEY)
'''

tables = {'teams' : teams_script.strip(),
         'players': players_script.strip(),
         'rosters' : rosters_script.strip(),
         'positions' : position_script.strip(),
         'resultsSB' : resultsSB_script.strip()}

In [9]:
### Drops all existing tables
for table in tables.keys():
    print(f'dropping the table {table} if it already exists...')
    engine.execute(f'drop table IF EXISTS {table}')

dropping the table teams if it already exists...
dropping the table players if it already exists...
dropping the table rosters if it already exists...
dropping the table positions if it already exists...
dropping the table resultsSB if it already exists...


Keep this cell commented. This cell creates the schema in SQLite, but it is not used since pd.to_sql automatically creates the tables. 

In [10]:
# ## Creates tables based on above schema
# for table , script in tables.items():
#     print(f'creating the table {table}...')
# engine.execute(f'{script}')

In [11]:
try:
    positions.to_sql(name='positions', con=engine, if_exists='append', index=False)
except:
    print("Data is already in there...")

In [12]:
try:
    rosters.to_sql(name='rosters', con=engine, if_exists='append', index=False)
except:
    print("Data is already in there...")

In [13]:
try:
    resultsSB.to_sql(name='resultsSB', con=engine, if_exists='append', index=False)
except:
    print("Data is already in there...")

In [14]:
try:
    teams.to_sql(name='teams', con=engine, if_exists='append', index=False)
except:
    print("Data is already in there...")

In [15]:
try:
    players.to_sql(name='players', con=engine, if_exists='append', index=False)
except:
    print("Data is already in there...")