## Imports

In [7]:
# Imports
import pandas as pd
import os
from sqlalchemy import create_engine
import warnings
warnings.filterwarnings('ignore')

# Global Path Variables
PARENT_DIRECTORY = os.pardir

# Global Paths to Data Folders
RAW_DATA_FOLDER = os.path.join(PARENT_DIRECTORY, 'raw')
PROCESSED_DATA_FOLDER = os.path.join(PARENT_DIRECTORY, 'processed')

## Get the Data

In [8]:
# reading data from CSV
data = pd.read_csv(os.path.join(RAW_DATA_FOLDER, os.path.join('influenza_data', 'Influenza_Laboratory-Confirmed_Cases_By_County__Beginning_2009-10_Season.csv')))

## Clean the Data

In [9]:
# Convert the dates to datetime objects
data["Week Ending Date"] = pd.to_datetime(data["Week Ending Date"])

# Drop unnecessary columns
data = data.drop(columns=["Region", "CDC Week", "County Centroid"])

# Aggregate the data
data = data.groupby(["County", "Week Ending Date", "Season", "FIPS"]).sum().reset_index()
data.sort_values(by=["Week Ending Date"], inplace=True)

# Remove data before February 2020 and clean up the index
data = data[data["Week Ending Date"] >= "2020-02-15"].copy()
data.index = data["Week Ending Date"]
data.drop(columns=["Week Ending Date"], inplace=True)
data.reset_index(inplace=True)

# Display the cleaned data
display(data)

Unnamed: 0,Week Ending Date,County,Season,FIPS,Count
0,2020-02-15,DELAWARE,2019-2020,36025,48
1,2020-02-15,STEUBEN,2019-2020,36101,151
2,2020-02-15,SCHOHARIE,2019-2020,36095,24
3,2020-02-15,ST LAWRENCE,2019-2020,36089,307
4,2020-02-15,CORTLAND,2019-2020,36023,129
...,...,...,...,...,...
5265,2022-10-29,HAMILTON,2022-2023,36041,0
5266,2022-10-29,GREENE,2022-2023,36039,3
5267,2022-10-29,GENESEE,2022-2023,36037,2
5268,2022-10-29,ONTARIO,2022-2023,36069,9


In [10]:
# Save the processed data
data.to_csv(os.path.join(PROCESSED_DATA_FOLDER, 'influenza_case_data.csv'))

## Upload to the Database

In [None]:
# Set path to local MySQL password file
sql_pw_filepath = os.path.join(PARENT_DIRECTORY, os.path.join('..', 'sql_password.txt'))

# Raise exception if key file not found
if not os.path.exists(sql_pw_filepath):
    raise FileNotFoundError('Local MySQL password file not found! Please check directory.')

# Read in MySQL username and password as environment variable
with open(sql_pw_filepath, 'r') as f:
    os.environ['sql_username'] = f.readline().strip()
    os.environ['sql_password'] = f.readline().strip()

In [None]:
# Connection to MySQL Databse
db_connection_str = f'mysql+pymysql://{os.environ.get("sql_username")}:{os.environ.get("sql_password")}@aipi510.mysql.database.azure.com:3306/project'
db_connection_args = {'ssl': {'enable_tls': True}}
sql_engine = create_engine(db_connection_str, connect_args=db_connection_args)
db_connection= sql_engine.connect()

In [None]:
# writing processed data to database
table_name = 'influenza_data'
try:
    data.to_sql(table_name, db_connection, if_exists='replace')
except ValueError as vx:
    print(vx)
except Exception as ex:
    print(ex)
else:
    print(f'Table {table_name} created successfully!')

Table influenza_data created successfully!


In [None]:
# Ensure the data was loaded properly
test_sql_query = f'SELECT * FROM {table_name} LIMIT 10'
df_test = pd.read_sql(test_sql_query, db_connection)
df_test.head()

Unnamed: 0,index,Week Ending Date,County,Season,FIPS,Count
0,0,2020-02-15,DELAWARE,2019-2020,36025,48
1,1,2020-02-15,STEUBEN,2019-2020,36101,151
2,2,2020-02-15,SCHOHARIE,2019-2020,36095,24
3,3,2020-02-15,ST LAWRENCE,2019-2020,36089,307
4,4,2020-02-15,CORTLAND,2019-2020,36023,129


In [None]:
# closing connection to db
db_connection.close()