# Database Creator/Loader

> Author: Hannan Khan     
> Last Updated: 02/20/2022
  
This file creates a SQLite database with a table for each csv file in the fitness tracker data.
## Load Libraries

In [1]:
import sqlite3 as sql
import os
import csv
from pprint import pprint

## Create Database Objects

In [2]:
data_dir = r"D:\Datasets\Fitabase_Data"

# create a connection to the database:
db_con = sql.connect(data_dir + r"\\" + "database.db")

In [3]:
# create a cursor object:
cur = db_con.cursor()

In [4]:
# test if connection works:
cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cur.fetchall())

[]


## Python Script To Print Out 'Create Table' Statements
The statements still need to be manually fixed later.

In [5]:
# get all csv data filenames from data folder:
data_filenames = []

for datafilename in os.listdir(data_dir):
    if datafilename.endswith(".csv"):
        data_filenames.append(datafilename)

for datafilename in data_filenames:
    with open(data_dir + r"\\" + datafilename) as csv_file:
              print("CREATE TABLE IF NOT EXISTS ", end="")
              print(datafilename.replace(".csv",""), "(")
              cols = csv_file.readline().split(",")
              vals = csv_file.readline().split(",")
              for i,col in enumerate(cols):
                    print(col, vals[i])
              print(");")
              print("\n")

CREATE TABLE IF NOT EXISTS dailyActivity_merged (
Id 1503960366
ActivityDate 4/12/2016
TotalSteps 13162
TotalDistance 8.5
TrackerDistance 8.5
LoggedActivitiesDistance 0
VeryActiveDistance 1.87999999523163
ModeratelyActiveDistance 0.550000011920929
LightActiveDistance 6.05999994277954
SedentaryActiveDistance 0
VeryActiveMinutes 25
FairlyActiveMinutes 13
LightlyActiveMinutes 328
SedentaryMinutes 728
Calories
 1985

);


CREATE TABLE IF NOT EXISTS dailyCalories_merged (
Id 1503960366
ActivityDay 4/12/2016
Calories
 1985

);


CREATE TABLE IF NOT EXISTS dailyIntensities_merged (
Id 1503960366
ActivityDay 4/12/2016
SedentaryMinutes 728
LightlyActiveMinutes 328
FairlyActiveMinutes 13
VeryActiveMinutes 25
SedentaryActiveDistance 0
LightActiveDistance 6.05999994277954
ModeratelyActiveDistance 0.550000011920929
VeryActiveDistance
 1.87999999523163

);


CREATE TABLE IF NOT EXISTS dailySteps_merged (
Id 1503960366
ActivityDay 4/12/2016
StepTotal
 13162

);


CREATE TABLE IF NOT EXISTS heartrate_

## Creating All Tables
Now we create all the tables for each of the files (they all have different schema):  
The query to create all the tables is stored in [SQL_create_tables.txt](https://github.com/hannankhan888/Data_Science_Portfolio/blob/main/Wellness_Tech_Company_Analysis_Case_Study/SQL_queries/SQL_create_tables.txt).

In [6]:
query = ""
with open(r"SQL_queries\SQL_create_tables.txt", mode="r") as file:
    query = file.readlines()
query = ''.join(query)
cur.executescript(query)
db_con.commit()

cur.execute("SELECT name FROM sqlite_master WHERE type='table';")

print("Tables:")
for table in cur.fetchall():
    print(table)

Tables:
('dailyActivity_merged',)
('dailyCalories_merged',)
('dailyIntensities_merged',)
('dailySteps_merged',)
('heartrate_seconds_merged',)
('hourlyCalories_merged',)
('hourlyIntensities_merged',)
('hourlySteps_merged',)
('minuteCaloriesNarrow_merged',)
('minuteCaloriesWide_merged',)
('minuteIntensitiesNarrow_merged',)
('minuteIntensitiesWide_merged',)
('minuteMETsNarrow_merged',)
('minuteSleep_merged',)
('minuteStepsNarrow_merged',)
('minuteStepsWide_merged',)
('sleepDay_merged',)
('weightLogInfo_merged',)


## Loading CSV Files Into Their Respective Tables

In [7]:
for datafilename in data_filenames:
    path_to_csv = data_dir + r"\\" + datafilename
    with open(path_to_csv,mode='r', encoding="ISO-8859-1") as csv_file:
        reader = csv.reader(csv_file)
        datafilename = datafilename.replace(".csv", "")
        for i,row in enumerate(reader):
            if i > 0:
                stmt = f"INSERT INTO {datafilename} VALUES({','.join('?' * len(row))});"
                cur.execute(stmt, row)
        db_con.commit()

In [8]:
# lets check if it has worked:
cur.execute("SELECT * FROM dailyIntensities_merged")
print(len(cur.fetchall()))

940


## Awesome! Length Matches Up. It has worked!

Lets look at each table, and see what type of data each column has to offer:

In [9]:
# get tables and get column name and example data:
get_cols = f"""
SELECT
  m.name,
  p.name
FROM 
  sqlite_master AS m
JOIN 
  pragma_table_info(m.name) AS p
WHERE m.type='table'
"""
cur.execute(get_cols)
tables_cols = cur.fetchall()

print("|{:30}|{:25}|{:25}|".format("TABLE", "COLUMN", "EXAMPLE"))
print("|","="*80,"|")
for table,col in tables_cols:
    get_example = f"""
    SELECT {col}
    FROM {table}
    LIMIT 1
    """
    cur.execute(get_example)
    print("|{:30}|{:25}|{:25}|".format(table, col, str(cur.fetchall()[0][0])))

|TABLE                         |COLUMN                   |EXAMPLE                  |
|dailyActivity_merged          |Id                       |1503960366               |
|dailyActivity_merged          |ActivityDate             |4/12/2016                |
|dailyActivity_merged          |TotalSteps               |13162                    |
|dailyActivity_merged          |TotalDistance            |8.5                      |
|dailyActivity_merged          |TrackerDistance          |8.5                      |
|dailyActivity_merged          |LoggedActivitiesDistance |0                        |
|dailyActivity_merged          |VeryActiveDistance       |1.87999999523163         |
|dailyActivity_merged          |ModeratelyActiveDistance |0.550000011920929        |
|dailyActivity_merged          |LightActiveDistance      |6.05999994277954         |
|dailyActivity_merged          |SedentaryActiveDistance  |0                        |
|dailyActivity_merged          |VeryActiveMinutes        |25     