# Converting a group of csv files with our data to a SQLite database

The purpose of the following notebook is to highlight the process of turning the csv files into a usable, and easily accessible database

In [1]:
#import dependencies
import pandas as pd
import sqlite3
import sqlalchemy as sql
import datetime as dt

##### Set Up database ######
engine = sql.create_engine("sqlite:///data/sales.db", echo=True)


In [2]:
#load the file(s) we need into a pandas df
path = "Resources"
weather_df = pd.read_csv(f"{path}/week_weather_summary.csv")
sales_df = pd.read_csv(f"{path}/weekly_sales_complete.csv")

In [3]:
#preview both dfs

## Weather

In [4]:
weather_df.head()

Unnamed: 0,dt,temp,feels_like,temp_min,temp_max,pressure,humidity,wind_speed,clouds_all,weather_main_Clear,weather_main_Clouds,weather_main_Drizzle,weather_main_Fog,weather_main_Haze,weather_main_Mist,weather_main_Rain,weather_main_Smoke,weather_main_Snow,weather_main_Thunderstorm
0,2019-02-15,23.368876,14.429101,-5.1,41.14,1018.101124,76.247191,11.68809,50.320225,0.410112,0.224719,0.005618,0.005618,0.0,0.039326,0.067416,0.0,0.247191,0.0
1,2019-02-22,22.586823,18.859063,0.37,42.76,1019.541667,77.239583,3.882969,61.526042,0.229167,0.213542,0.041667,0.010417,0.0625,0.088542,0.130208,0.0,0.223958,0.0
2,2019-03-01,15.662619,11.220119,-8.72,31.91,1022.636905,64.565476,3.630298,43.059524,0.464286,0.386905,0.0,0.0,0.053571,0.005952,0.0,0.0,0.089286,0.0
3,2019-03-08,34.007813,31.361771,5.77,62.64,1012.838542,79.098958,3.752135,52.005208,0.302083,0.203125,0.057292,0.010417,0.057292,0.109375,0.239583,0.0,0.020833,0.0
4,2019-03-15,36.160057,33.896761,21.74,52.23,1019.306818,71.698864,3.503636,48.096591,0.392045,0.340909,0.028409,0.0,0.051136,0.079545,0.102273,0.0,0.005682,0.0


In [5]:
#checking data types
weather_df.dtypes

dt                            object
temp                         float64
feels_like                   float64
temp_min                     float64
temp_max                     float64
pressure                     float64
humidity                     float64
wind_speed                   float64
clouds_all                   float64
weather_main_Clear           float64
weather_main_Clouds          float64
weather_main_Drizzle         float64
weather_main_Fog             float64
weather_main_Haze            float64
weather_main_Mist            float64
weather_main_Rain            float64
weather_main_Smoke           float64
weather_main_Snow            float64
weather_main_Thunderstorm    float64
dtype: object

In [6]:
#converting respective date column into datetime format for easier comparison
weather_df['dt'] = pd.to_datetime(weather_df['dt'])

In [7]:
#save df to sqlite table
weather_df.to_sql('weather', con=engine, if_exists="replace")

2021-09-29 20:07:25,153 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("weather")
2021-09-29 20:07:25,155 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 20:07:25,157 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("weather")
2021-09-29 20:07:25,158 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 20:07:25,160 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' ORDER BY name
2021-09-29 20:07:25,161 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 20:07:25,163 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("weather")
2021-09-29 20:07:25,163 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 20:07:25,169 INFO sqlalchemy.engine.Engine SELECT sql FROM  (SELECT * FROM sqlite_master UNION ALL   SELECT * FROM sqlite_temp_master) WHERE name = ? AND type = 'table'
2021-09-29 20:07:25,170 INFO sqlalchemy.engine.Engine [raw sql] ('weather',)
2021-09-29 20:07:25,172 INFO sqlalchemy.engine.Engine PRAGMA main.foreign_key_list("weather"

## Sales

In [8]:
sales_df.head()

Unnamed: 0,Item,Item Code,Quantity,Unit Price,Total Sales Amount,date
0,PINT Spotted Cow,3140,64.0,3.9375,252.0,2021-01-08
1,DBL RAIL Vodka,3455,37.0,4.945946,183.0,2021-01-08
2,BTL Miller High Life,3122,31.0,3.25,100.75,2021-01-08
3,PINT Stein,3141,29.0,3.517241,102.0,2021-01-08
4,SHOT Seagrams VO,3325,26.0,2.423077,63.0,2021-01-08


In [9]:
#checking data types
sales_df.dtypes

Item                   object
Item Code               int64
Quantity              float64
Unit Price            float64
Total Sales Amount    float64
date                   object
dtype: object

In [10]:
#converting respective date column into datetime format for easier comparison
sales_df['date'] = pd.to_datetime(sales_df['date'])

In [11]:
#save df to sqlite table
sales_df.to_sql('sales', con=engine, if_exists='replace')

2021-09-29 20:07:25,339 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("sales")
2021-09-29 20:07:25,340 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 20:07:25,343 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("sales")
2021-09-29 20:07:25,344 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 20:07:25,346 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' ORDER BY name
2021-09-29 20:07:25,347 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 20:07:25,349 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("sales")
2021-09-29 20:07:25,350 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 20:07:25,353 INFO sqlalchemy.engine.Engine SELECT sql FROM  (SELECT * FROM sqlite_master UNION ALL   SELECT * FROM sqlite_temp_master) WHERE name = ? AND type = 'table'
2021-09-29 20:07:25,354 INFO sqlalchemy.engine.Engine [raw sql] ('sales',)
2021-09-29 20:07:25,356 INFO sqlalchemy.engine.Engine PRAGMA main.foreign_key_list("sales")
2021-09-

In [12]:
#create a new dataframe to hold the merged sales and weather data
new_sales_df = pd.merge(sales_df, weather_df, left_on=sales_df['date'], right_on=weather_df['dt'])
#copying redundant columns
new_sales_df = new_sales_df.drop(columns=['date', 'dt'])
#rename leftmost column
new_sales_df = new_sales_df.rename(columns={'key_0': 'date'})

In [13]:
new_sales_df

Unnamed: 0,date,Item,Item Code,Quantity,Unit Price,Total Sales Amount,temp,feels_like,temp_min,temp_max,...,weather_main_Clear,weather_main_Clouds,weather_main_Drizzle,weather_main_Fog,weather_main_Haze,weather_main_Mist,weather_main_Rain,weather_main_Smoke,weather_main_Snow,weather_main_Thunderstorm
0,2021-01-08,PINT Spotted Cow,3140,64.0,3.937500,252.00,29.488294,24.688941,17.26,41.32,...,0.123529,0.582353,0.005882,0.005882,0.0,0.152941,0.105882,0.0,0.023529,0.0
1,2021-01-08,DBL RAIL Vodka,3455,37.0,4.945946,183.00,29.488294,24.688941,17.26,41.32,...,0.123529,0.582353,0.005882,0.005882,0.0,0.152941,0.105882,0.0,0.023529,0.0
2,2021-01-08,BTL Miller High Life,3122,31.0,3.250000,100.75,29.488294,24.688941,17.26,41.32,...,0.123529,0.582353,0.005882,0.005882,0.0,0.152941,0.105882,0.0,0.023529,0.0
3,2021-01-08,PINT Stein,3141,29.0,3.517241,102.00,29.488294,24.688941,17.26,41.32,...,0.123529,0.582353,0.005882,0.005882,0.0,0.152941,0.105882,0.0,0.023529,0.0
4,2021-01-08,SHOT Seagrams VO,3325,26.0,2.423077,63.00,29.488294,24.688941,17.26,41.32,...,0.123529,0.582353,0.005882,0.005882,0.0,0.152941,0.105882,0.0,0.023529,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23586,2020-01-03,Skyy Watermelon BOMB,3374,1.0,4.000000,4.00,30.726059,24.031235,7.29,48.90,...,0.352941,0.588235,0.000000,0.000000,0.0,0.011765,0.029412,0.0,0.017647,0.0
23587,2020-01-03,PIT LKFT Hazy Rabbit,3359,1.0,15.000000,15.00,30.726059,24.031235,7.29,48.90,...,0.352941,0.588235,0.000000,0.000000,0.0,0.011765,0.029412,0.0,0.017647,0.0
23588,2020-01-03,PIT Abita TurboDog,3363,1.0,15.000000,15.00,30.726059,24.031235,7.29,48.90,...,0.352941,0.588235,0.000000,0.000000,0.0,0.011765,0.029412,0.0,0.017647,0.0
23589,2020-01-03,SHOT Titos,3655,1.0,0.000000,0.00,30.726059,24.031235,7.29,48.90,...,0.352941,0.588235,0.000000,0.000000,0.0,0.011765,0.029412,0.0,0.017647,0.0
