In [1]:
import pandas as pd
from sqlalchemy import create_engine
import datetime as dt
from config import password

In [2]:
# Read in data
df = pd.read_csv("Resources/soccer_data_raw.csv")
df.head()

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral
0,1872-11-30,Scotland,England,0,0,Friendly,Glasgow,Scotland,False
1,1873-03-08,England,Scotland,4,2,Friendly,London,England,False
2,1874-03-07,Scotland,England,2,1,Friendly,Glasgow,Scotland,False
3,1875-03-06,England,Scotland,2,2,Friendly,London,England,False
4,1876-03-04,Scotland,England,3,0,Friendly,Glasgow,Scotland,False


## Transform Data

In [3]:
# Separate month and year
yr = df["date"].str.split("-")
year = [y[0] for y in yr]
month = [y[1] for y in yr]

# Add new columns
df["year"] = year
df["month"] = month

df.head()

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,year,month
0,1872-11-30,Scotland,England,0,0,Friendly,Glasgow,Scotland,False,1872,11
1,1873-03-08,England,Scotland,4,2,Friendly,London,England,False,1873,3
2,1874-03-07,Scotland,England,2,1,Friendly,Glasgow,Scotland,False,1874,3
3,1875-03-06,England,Scotland,2,2,Friendly,London,England,False,1875,3
4,1876-03-04,Scotland,England,3,0,Friendly,Glasgow,Scotland,False,1876,3


In [4]:
# Map months to a season (summer or winter)
# Summer = April - September
# Winter = October - March

month_num = [int(month_str) for month_str in df['month']]

season_list = []

for m in month_num:
    if 4 <= m <= 9:
        season = 'summer'
    else:
        season = 'winter'
    season_list.append(season)

In [5]:
# Convert month number to month text
month_list = []

for month in month_num:
    m = dt.date(1111, month, 1).strftime('%B')
    month_list.append(m)

In [6]:
# Delete numerical month column
del df['month']

# Add new month column
df['month'] = month_list

# Add season column
df['season'] = season_list

In [16]:
# Rearrange columns in data frame
df = df[['date', 'year', 'month', 'home_team', 'away_team', 'home_score', 'away_score', 'tournament', 
         'city', 'country', 'neutral', 'season']]
df.head()

Unnamed: 0,date,year,month,home_team,away_team,home_score,away_score,tournament,city,country,neutral,season
0,1872-11-30,1872,November,Scotland,England,0,0,Friendly,Glasgow,Scotland,False,winter
1,1873-03-08,1873,March,England,Scotland,4,2,Friendly,London,England,False,winter
2,1874-03-07,1874,March,Scotland,England,2,1,Friendly,Glasgow,Scotland,False,winter
3,1875-03-06,1875,March,England,Scotland,2,2,Friendly,London,England,False,winter
4,1876-03-04,1876,March,Scotland,England,3,0,Friendly,Glasgow,Scotland,False,winter


In [15]:
# Reset index
df2 = df.reset_index()

# Rename index column to id
df2 = df2.rename(columns={'index':'id'})

# Set index to id column
df2 = df2.set_index('id')
df2.head()

Unnamed: 0_level_0,date,year,month,home_team,away_team,home_score,away_score,tournament,city,country,neutral,season
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,1872-11-30,1872,November,Scotland,England,0,0,Friendly,Glasgow,Scotland,False,winter
1,1873-03-08,1873,March,England,Scotland,4,2,Friendly,London,England,False,winter
2,1874-03-07,1874,March,Scotland,England,2,1,Friendly,Glasgow,Scotland,False,winter
3,1875-03-06,1875,March,England,Scotland,2,2,Friendly,London,England,False,winter
4,1876-03-04,1876,March,Scotland,England,3,0,Friendly,Glasgow,Scotland,False,winter


## Connect & Load Into Database

In [17]:
# Create connection
engine = create_engine(f'postgresql+psycopg2://postgres:{password}@localhost:5432/olympics_db')

# Confirm table
engine.table_names()

['athletes', 'regions', 'summer', 'winter', 'country', 'soccer']

In [None]:
# Load data frame to database
df2.to_sql(name='soccer', con=engine, if_exists='append', index=True)

In [None]:
# Confirm data has been added
pd.read_sql_query('SELECT * FROM soccer', con=engine).head()