In [1]:
# path setup
import sys
import os
module_path = os.path.abspath(os.path.join('../../'))
sys.path.insert(1, module_path + "/utils")

## db setup
# pip install sqlalchemy
from sqlalchemy import create_engine, text
from getpass import getpass 

import pandas as pd
from data_processing import preprocess_climate_data, preprocess_shelter_data

In [2]:
password = getpass()

##### crate new db

In [3]:
# prepare the engine 
db_connection_string = 'mysql+pymysql://root:'+password+'@localhost/'
engine = create_engine(db_connection_string)

# connection to the MySQL server
conn = engine.connect()

# define db_name
database_name = 'shelter'

# drop the table if it exists
drop_table_query = text(f"DROP DATABASE IF EXISTS {database_name}")
conn.execute(drop_table_query)

# create a new database
create_db_query = text(f"CREATE DATABASE IF NOT EXISTS {database_name}")
conn.execute(create_db_query)

# update the database connection string
db_connection_string = f'mysql+pymysql://root:{password}@localhost/{database_name}'

# connect to the new database
engine = create_engine(db_connection_string)

### write data to db

#### get dataframes

In [4]:
climate_url = '../../data/raw/climate/climate-toronto2021-Q1-2024.csv'
climate_df = preprocess_climate_data(climate_url)

shelter_folder = '../../data/raw/shelter/'
shelter_files = [os.path.join(shelter_folder, file) for file in os.listdir(shelter_folder) if file.endswith('.csv')]
shelter_df = preprocess_shelter_data(shelter_files)

shelter_climate = pd.merge(shelter_df, climate_df, on='date', how='left')

#### write climate to db

In [5]:
climate_df.to_sql('climate', con=engine, if_exists='replace', index=False)

1186

#### write shelter to db

In [6]:
shelter_df.to_sql('shelter', con=engine, if_exists='replace', index=False)

128349

#### write shelter_climate to db

In [7]:
shelter_climate.to_sql('shelter_climate', con=engine, if_exists='replace', index=False)

128349

In [8]:
# closing the connection
engine.dispose()