In [1]:
# library imports
import pandas as pd
import os
from sqlalchemy import create_engine

In [2]:
# reading data from CSV
data = pd.read_csv("../data/raw/influenza_data/Influenza_Laboratory-Confirmed_Cases_By_County__Beginning_2009-10_Season.csv")

In [3]:
# cleaning data and selecting columns to keep

data["Week Ending Date"] = pd.to_datetime(data["Week Ending Date"])
data = data.drop(columns=["Region", "CDC Week", "County Centroid"])
data = data.groupby(["County", "Week Ending Date", "Season", "FIPS"]).sum().reset_index()
data.sort_values(by=["Week Ending Date"], inplace=True)
data = data[data["Week Ending Date"] >= "2020-02-15"].copy()
data.index = data["Week Ending Date"]
data.drop(columns=["Week Ending Date"], inplace=True)
data.reset_index(inplace=True)

In [5]:
# Set path to local MySQL password file
sql_pw_filepath = "../sql_password.txt"

# Raise exception if key file not found
if not os.path.exists(sql_pw_filepath):
    raise FileNotFoundError('Local MySQL password file not found! Please check directory.')

# Read in MySQL username and password as environment variable
with open(sql_pw_filepath, 'r') as f:
    os.environ['sql_username'] = f.readline().strip()
    os.environ['sql_password'] = f.readline().strip()

In [6]:
# Connection to mysql database

db_connection_str = f'mysql+pymysql://{os.environ.get("sql_username")}:{os.environ.get("sql_password")}@aipi510.mysql.database.azure.com:3306/project'
db_connection_args = {'ssl': {'enable_tls': True}}
sql_engine = create_engine(db_connection_str, connect_args=db_connection_args)
db_connection= sql_engine.connect()

In [7]:
# writing processed data to database
table_name = 'influenza_data'
try:
    data.to_sql(table_name, db_connection, if_exists='replace')
except ValueError as vx:
    print(vx)
except Exception as ex:
    print(ex)
else:
    print(f'Table {table_name} created successfully!')

Table influenza_data created successfully!


In [8]:
test_sql_query = f'SELECT * FROM {table_name} LIMIT 10'
df_test = pd.read_sql(test_sql_query, db_connection)
df_test.head()

Unnamed: 0,index,Week Ending Date,County,Season,FIPS,Count
0,0,2020-02-15,DELAWARE,2019-2020,36025,48
1,1,2020-02-15,STEUBEN,2019-2020,36101,151
2,2,2020-02-15,SCHOHARIE,2019-2020,36095,24
3,3,2020-02-15,ST LAWRENCE,2019-2020,36089,307
4,4,2020-02-15,CORTLAND,2019-2020,36023,129


In [9]:
# closing connection to db
db_connection.close()