# Converting a group of csv files with our data to a SQLite database

The purpose of the following notebook is to highlight the process of turning the csv files into a usable, and easily accessible database

In [1]:
#import dependencies
import pandas as pd
import sqlite3
import sqlalchemy as sql
import datetime as dt

##### Set Up database ######
engine = sql.create_engine("sqlite:///data/sales.db", echo=True)


In [2]:
#load the file(s) we need into a pandas df
path = "Resources"
weather_df = pd.read_csv(f"{path}/week_weather_summary.csv")
sales_df = pd.read_csv(f"{path}/weekly_sales.csv")

In [3]:
#preview both dfs

## Weather

In [4]:
weather_df.head()

Unnamed: 0,dt,temp,feels_like,temp_min,temp_max,pressure,humidity,wind_speed,clouds_all,weather_main_Clear,weather_main_Clouds,weather_main_Drizzle,weather_main_Fog,weather_main_Haze,weather_main_Mist,weather_main_Rain,weather_main_Smoke,weather_main_Snow,weather_main_Thunderstorm
0,2019-02-15,23.368876,14.429101,-5.1,41.14,1018.101124,76.247191,11.68809,50.320225,0.410112,0.224719,0.005618,0.005618,0.0,0.039326,0.067416,0.0,0.247191,0.0
1,2019-02-22,22.586823,18.859063,0.37,42.76,1019.541667,77.239583,3.882969,61.526042,0.229167,0.213542,0.041667,0.010417,0.0625,0.088542,0.130208,0.0,0.223958,0.0
2,2019-03-01,15.662619,11.220119,-8.72,31.91,1022.636905,64.565476,3.630298,43.059524,0.464286,0.386905,0.0,0.0,0.053571,0.005952,0.0,0.0,0.089286,0.0
3,2019-03-08,34.007813,31.361771,5.77,62.64,1012.838542,79.098958,3.752135,52.005208,0.302083,0.203125,0.057292,0.010417,0.057292,0.109375,0.239583,0.0,0.020833,0.0
4,2019-03-15,36.160057,33.896761,21.74,52.23,1019.306818,71.698864,3.503636,48.096591,0.392045,0.340909,0.028409,0.0,0.051136,0.079545,0.102273,0.0,0.005682,0.0


In [5]:
#checking data types
weather_df.dtypes

dt                            object
temp                         float64
feels_like                   float64
temp_min                     float64
temp_max                     float64
pressure                     float64
humidity                     float64
wind_speed                   float64
clouds_all                   float64
weather_main_Clear           float64
weather_main_Clouds          float64
weather_main_Drizzle         float64
weather_main_Fog             float64
weather_main_Haze            float64
weather_main_Mist            float64
weather_main_Rain            float64
weather_main_Smoke           float64
weather_main_Snow            float64
weather_main_Thunderstorm    float64
dtype: object

In [6]:
#converting respective date column into datetime format for easier comparison
weather_df['dt'] = pd.to_datetime(weather_df['dt'])

In [7]:
#save df to sqlite table
weather_df.to_sql('weather', con=engine, if_exists="replace")

2021-09-29 18:38:41,827 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("weather")
2021-09-29 18:38:41,829 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 18:38:41,832 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("weather")
2021-09-29 18:38:41,833 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 18:38:41,836 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' ORDER BY name
2021-09-29 18:38:41,837 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 18:38:41,838 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("weather")
2021-09-29 18:38:41,839 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 18:38:41,845 INFO sqlalchemy.engine.Engine SELECT sql FROM  (SELECT * FROM sqlite_master UNION ALL   SELECT * FROM sqlite_temp_master) WHERE name = ? AND type = 'table'
2021-09-29 18:38:41,845 INFO sqlalchemy.engine.Engine [raw sql] ('weather',)
2021-09-29 18:38:41,847 INFO sqlalchemy.engine.Engine PRAGMA main.foreign_key_list("weather"

## Sales

In [8]:
sales_df.head()

Unnamed: 0,Item,Item Code,Quantity,Unit Price,Total Sales Amount,date
0,RAIL Vodka,3339,46.0,4.826087,222.0,2019-02-15
1,PINT LKFT IPA,3136,44.0,3.579545,157.5,2019-02-15
2,SHOT Tullamore Dew,3327,38.0,4.0,152.0,2019-02-15
3,PINT Spotted Cow,3140,36.0,3.75,135.0,2019-02-15
4,PINT Miller High Life,3137,30.0,3.0,90.0,2019-02-15


In [9]:
#checking data types
sales_df.dtypes

Item                   object
Item Code               int64
Quantity              float64
Unit Price            float64
Total Sales Amount    float64
date                   object
dtype: object

In [10]:
#converting respective date column into datetime format for easier comparison
sales_df['date'] = pd.to_datetime(sales_df['date'])

In [11]:
#save df to sqlite table
sales_df.to_sql('sales', con=engine, if_exists='replace')

2021-09-29 18:38:42,005 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("sales")
2021-09-29 18:38:42,006 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 18:38:42,008 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("sales")
2021-09-29 18:38:42,009 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 18:38:42,011 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' ORDER BY name
2021-09-29 18:38:42,012 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 18:38:42,013 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("sales")
2021-09-29 18:38:42,015 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 18:38:42,017 INFO sqlalchemy.engine.Engine SELECT sql FROM  (SELECT * FROM sqlite_master UNION ALL   SELECT * FROM sqlite_temp_master) WHERE name = ? AND type = 'table'
2021-09-29 18:38:42,018 INFO sqlalchemy.engine.Engine [raw sql] ('sales',)
2021-09-29 18:38:42,020 INFO sqlalchemy.engine.Engine PRAGMA main.foreign_key_list("sales")
2021-09-

In [12]:
#create a new dataframe to hold the merged sales and weather data
new_sales_df = pd.merge(sales_df, weather_df, left_on=sales_df['date'], right_on=weather_df['dt'])
#copying redundant columns
new_sales_df = new_sales_df.drop(columns=['date', 'dt'])
#rename leftmost column
new_sales_df = new_sales_df.rename(columns={'key_0': 'date'})

In [13]:
new_sales_df

Unnamed: 0,date,Item,Item Code,Quantity,Unit Price,Total Sales Amount,temp,feels_like,temp_min,temp_max,...,weather_main_Clear,weather_main_Clouds,weather_main_Drizzle,weather_main_Fog,weather_main_Haze,weather_main_Mist,weather_main_Rain,weather_main_Smoke,weather_main_Snow,weather_main_Thunderstorm
0,2019-02-15,RAIL Vodka,3339,46.0,4.826087,222.0,23.368876,14.429101,-5.10,41.14,...,0.410112,0.224719,0.005618,0.005618,0.0,0.039326,0.067416,0.0,0.247191,0.0
1,2019-02-15,PINT LKFT IPA,3136,44.0,3.579545,157.5,23.368876,14.429101,-5.10,41.14,...,0.410112,0.224719,0.005618,0.005618,0.0,0.039326,0.067416,0.0,0.247191,0.0
2,2019-02-15,SHOT Tullamore Dew,3327,38.0,4.000000,152.0,23.368876,14.429101,-5.10,41.14,...,0.410112,0.224719,0.005618,0.005618,0.0,0.039326,0.067416,0.0,0.247191,0.0
3,2019-02-15,PINT Spotted Cow,3140,36.0,3.750000,135.0,23.368876,14.429101,-5.10,41.14,...,0.410112,0.224719,0.005618,0.005618,0.0,0.039326,0.067416,0.0,0.247191,0.0
4,2019-02-15,PINT Miller High Life,3137,30.0,3.000000,90.0,23.368876,14.429101,-5.10,41.14,...,0.410112,0.224719,0.005618,0.005618,0.0,0.039326,0.067416,0.0,0.247191,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15580,2021-09-17,SHOT Jack Daniels,3527,1.0,4.000000,4.0,69.262653,69.396735,50.74,87.76,...,0.806122,0.030612,0.000000,0.000000,0.0,0.020408,0.142857,0.0,0.000000,0.0
15581,2021-09-17,DBL Hendricks Gin,3485,1.0,9.000000,9.0,69.262653,69.396735,50.74,87.76,...,0.806122,0.030612,0.000000,0.000000,0.0,0.020408,0.142857,0.0,0.000000,0.0
15582,2021-09-17,SHOT Jose Cuervo,3475,1.0,3.000000,3.0,69.262653,69.396735,50.74,87.76,...,0.806122,0.030612,0.000000,0.000000,0.0,0.020408,0.142857,0.0,0.000000,0.0
15583,2021-09-17,SHOT Skyy Raspb,3669,1.0,3.000000,3.0,69.262653,69.396735,50.74,87.76,...,0.806122,0.030612,0.000000,0.000000,0.0,0.020408,0.142857,0.0,0.000000,0.0
