In [111]:
# Import Dependencies
import pandas as pd
import datetime as dt
from sqlalchemy import create_engine
#from config import pw

In [112]:
# Extract all 4 csv files into pandas df.
df_stores = pd.read_csv('Resources/stores data-set.csv')
df_sales = pd.read_csv('Resources/sales data-set.csv')
df_features = pd.read_csv('Resources/Features data set.csv')
df_holidays = pd.read_csv('Resources/holidays.csv')

In [113]:
# Check Features Data Types
df_features.dtypes

Store             int64
Date             object
Temperature     float64
Fuel_Price      float64
MarkDown1       float64
MarkDown2       float64
MarkDown3       float64
MarkDown4       float64
MarkDown5       float64
CPI             float64
Unemployment    float64
IsHoliday          bool
dtype: object

In [114]:
# Display Features DF
df_features.head()

Unnamed: 0,Store,Date,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,IsHoliday
0,1,05/02/2010,42.31,2.572,,,,,,211.096358,8.106,False
1,1,12/02/2010,38.51,2.548,,,,,,211.24217,8.106,True
2,1,19/02/2010,39.93,2.514,,,,,,211.289143,8.106,False
3,1,26/02/2010,46.63,2.561,,,,,,211.319643,8.106,False
4,1,05/03/2010,46.5,2.625,,,,,,211.350143,8.106,False


In [115]:
# Create a filtered dataframe from specific columns
features_cols = ["Store", "Date", "Temperature", "Fuel_Price", "MarkDown1", 
                 "MarkDown2", "MarkDown3", "MarkDown4", "MarkDown5", "CPI",
                 "Unemployment", "IsHoliday"
                ]
df_features_transformed = df_features[features_cols].copy()

In [116]:
#Convert Features Column 'Date' to Datetime. 
df_features_transformed['Date'] = pd.to_datetime(df_features_transformed['Date'])
# df_features['Date'] = df_features['Date'].dt.date

# Convert Features Column 'Store' to String
df_features_transformed['Store'] = df_features_transformed['Store'].astype(str)

# Round each Float value to 2 decimal places.
df_features_transformed = df_features_transformed.round({'Temperature': 2, 'Fuel_Price': 2, 'CPI': 2, 'Unemployment': 2})

# Convert all NaN values to 0.0
df_features_transformed = df_features_transformed.fillna(0.0)

# Rename Markdown and IsHoliday columns.
df_features_transformed = df_features_transformed.rename(columns={'MarkDown1': 'Mark_Down_1', 'MarkDown2': 'Mark_Down_2',
                                                      'MarkDown3': 'Mark_Down_3', 'MarkDown4': 'Mark_Down_4',
                                                      'MarkDown5': 'Mark_Down_5', 'IsHoliday': 'Is_Holiday'
                                                     })

# Lowercase column values
df_features_transformed.columns = map(str.lower, df_features_transformed.columns)

In [117]:
df_features_transformed.head()


Unnamed: 0,store,date,temperature,fuel_price,mark_down_1,mark_down_2,mark_down_3,mark_down_4,mark_down_5,cpi,unemployment,is_holiday
0,1,2010-05-02,42.31,2.57,0.0,0.0,0.0,0.0,0.0,211.1,8.11,False
1,1,2010-12-02,38.51,2.55,0.0,0.0,0.0,0.0,0.0,211.24,8.11,True
2,1,2010-02-19,39.93,2.51,0.0,0.0,0.0,0.0,0.0,211.29,8.11,False
3,1,2010-02-26,46.63,2.56,0.0,0.0,0.0,0.0,0.0,211.32,8.11,False
4,1,2010-05-03,46.5,2.62,0.0,0.0,0.0,0.0,0.0,211.35,8.11,False


In [118]:
df_features_transformed.dtypes


store                   object
date            datetime64[ns]
temperature            float64
fuel_price             float64
mark_down_1            float64
mark_down_2            float64
mark_down_3            float64
mark_down_4            float64
mark_down_5            float64
cpi                    float64
unemployment           float64
is_holiday                bool
dtype: object

In [119]:
#Sales DF

In [120]:
# Check Sales Data Types
df_sales.dtypes

Store             int64
Dept              int64
Date             object
Weekly_Sales    float64
IsHoliday          bool
dtype: object

In [121]:
# Display Sales DF
df_sales.head()

Unnamed: 0,Store,Dept,Date,Weekly_Sales,IsHoliday
0,1,1,05/02/2010,24924.5,False
1,1,1,12/02/2010,46039.49,True
2,1,1,19/02/2010,41595.55,False
3,1,1,26/02/2010,19403.54,False
4,1,1,05/03/2010,21827.9,False


In [122]:
# Create a filtered dataframe from specific columns
sales_cols = ["Store", "Dept", "Date",
              "Weekly_Sales", "IsHoliday"
             ]
df_sales_transformed = df_sales[sales_cols].copy()

In [123]:
# Convert Sales Columns 'Store' and 'Dept' to String
df_sales_transformed['Store'] = df_sales_transformed['Store'].astype(str)
df_sales_transformed['Dept'] = df_sales_transformed['Dept'].astype(str)

# Rename Sales Column 'IsHoliday'
df_sales_transformed = df_sales_transformed.rename(columns={'IsHoliday': 'Is_Holiday'})

# Convert Sales Column 'Date' to Datetime. 
df_sales_transformed['Date'] = pd.to_datetime(df_sales_transformed['Date'])

# Lowercase column values
df_sales_transformed.columns = map(str.lower, df_sales_transformed.columns)

In [124]:
df_sales_transformed.head()


Unnamed: 0,store,dept,date,weekly_sales,is_holiday
0,1,1,2010-05-02,24924.5,False
1,1,1,2010-12-02,46039.49,True
2,1,1,2010-02-19,41595.55,False
3,1,1,2010-02-26,19403.54,False
4,1,1,2010-05-03,21827.9,False


In [125]:
df_sales_transformed.dtypes


store                   object
dept                    object
date            datetime64[ns]
weekly_sales           float64
is_holiday                bool
dtype: object

In [126]:
#Holidays DF

In [127]:
# Check Holidays Data Types
df_holidays.dtypes

Holiday    object
Date       object
dtype: object

In [128]:
# Display Holidays DF
df_holidays.head()

Unnamed: 0,Holiday,Date
0,super_bowl,12/2/2010
1,super_bowl,11/2/2011
2,super_bowl,10/2/2012
3,super_bowl,8/2/2013
4,labor_day,10/9/2010


In [129]:
# Create a filtered dataframe from specific columns
holidays_cols = ["Holiday", "Date"]
df_holidays_transformed = df_holidays[holidays_cols].copy()

In [130]:
# Convert Holidays column 'Date' to Datetime.
df_holidays_transformed['Date'] = pd.to_datetime(df_holidays_transformed['Date'])

# Lowercase column values
df_holidays_transformed.columns = map(str.lower, df_holidays_transformed.columns)

In [131]:
df_holidays_transformed


Unnamed: 0,holiday,date
0,super_bowl,2010-12-02
1,super_bowl,2011-11-02
2,super_bowl,2012-10-02
3,super_bowl,2013-08-02
4,labor_day,2010-10-09
5,labor_day,2011-09-09
6,labor_day,2012-07-09
7,labor_day,2013-06-09
8,thanksgiving,2010-11-26
9,thanksgiving,2011-11-25


In [132]:
df_holidays_transformed.dtypes


holiday            object
date       datetime64[ns]
dtype: object

In [133]:
#Stores DF

In [134]:
# Check Stores Data Types
df_stores.dtypes

Store     int64
Type     object
Size      int64
dtype: object

In [135]:
# Display Stores DF
df_stores.head()

Unnamed: 0,Store,Type,Size
0,1,A,151315
1,2,A,202307
2,3,B,37392
3,4,A,205863
4,5,B,34875


In [136]:
# Create a filtered dataframe from specific columns
stores_cols = ["Store", "Type", "Size"]
df_stores_transformed = df_stores[stores_cols].copy()

In [137]:
# Convert Stores Column 'Store' to String
df_stores_transformed['Store'] = df_stores_transformed['Store'].astype(str)

# Lowercase column values
df_stores_transformed.columns = map(str.lower, df_stores_transformed.columns)

In [138]:
df_stores_transformed.dtypes


store    object
type     object
size      int64
dtype: object

In [139]:
df_stores_transformed.head()


Unnamed: 0,store,type,size
0,1,A,151315
1,2,A,202307
2,3,B,37392
3,4,A,205863
4,5,B,34875


In [140]:
# Load

In [141]:
# Create PostgreSQL Database Connection
connection = "postgres:postgres@localhost:5432/retail_db"
engine = create_engine(f'postgresql://{connection}')

In [142]:
# Confirm tables
engine.table_names()

['stores', 'holidays', 'features', 'sales']

In [143]:
# Load DataFrames into database
df_stores_transformed.to_sql(name='stores', con=engine, if_exists='append', index=False)

In [144]:
df_holidays_transformed.to_sql(name='holidays', con=engine, if_exists='append', index=False)

In [145]:
df_features_transformed.to_sql(name='features', con=engine, if_exists='append', index=False)

In [None]:
df_sales_transformed.to_sql(name='sales', con=engine, if_exists='append', index=False)