# COVID Data Visualization for 5 regions

In [None]:
import os
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from custuntions import phase_mask, line_plot, scatter_plot

# Use white grid plot background from seaborn
sns.set(font_scale=1.5, style="whitegrid")

In [None]:
dfs = {}

In [None]:
for file in os.listdir("./DATA"):
    filename = file.split(".")
    dfs[f"{filename[0]}"] = pd.read_csv(f"./DATA/{file}")

In [None]:
for city in dfs:
    dfs[city]["Date"] = pd.to_datetime(dfs[city]["YEAR"].astype(str) + "/" + dfs[city]["MO"].astype(str) + "/" + dfs[city]["DY"].astype(str))
    dfs[city].set_index('Date', inplace=True)
    dfs[city].drop(["LAT", "LON", "YEAR", "MO", "DY"], axis=1, inplace=True)

In [None]:
df_cases = pd.read_csv("US_state_cases.csv")

In [None]:
df_cases

In [None]:
df_cases['date'] = pd.to_datetime(df_cases['date'])
df_cases.rename(columns={'date':'Date'}, inplace=True)
df_cases.set_index('Date', inplace= True)
df_cases.drop(['fips'], axis=1, inplace=True)
df_cases = df_cases.loc["2020-03-01":"2021-03-16"]

In [None]:
for city in dfs:
    dfs[city] = pd.merge(left=dfs[city],
                         left_index=True,
                         right=df_cases.loc[(df_cases['state'] == city), ['cases', 'deaths']],
                         right_index=True,
                         how='inner')

In [None]:
dfs_southern = [dfs['Arizona'], dfs['Louisiana'], dfs['Texas'], dfs['Florida']]

In [None]:
dfs_northern = [dfs['Minnesota'], dfs['Massachusetts']]

In [None]:
dfs_western = [dfs['Nevada'], dfs['California'], dfs['Oregon']]

In [None]:
dfs_eastern = [dfs['New York'], dfs['New Jersey']]

In [None]:
dfs_central = [dfs['Colorado']]

In [None]:
dfs['Nevada']

In [None]:
df_southern_mean = pd.concat(dfs_southern).groupby("Date").mean()
df_northern_mean = pd.concat(dfs_northern).groupby("Date").mean()
df_eastern_mean = pd.concat(dfs_eastern).groupby("Date").mean()
df_western_mean = pd.concat(dfs_western).groupby("Date").mean()
df_central_mean = pd.concat(dfs_central).groupby("Date").mean()

In [None]:
columns = {'RH2M':'Relative Humidity at 2 Meters (%)',
           'T2MDEW': 'Dew/Frost Point at 2 Meters (C)',
           'T2M_MAX': 'Maximum Temperature at 2 Meters (C)',
           'T2M_MIN' :'Minimum Temperature at 2 Meters (C)',
           'T2M_RANGE': 'Temperature Range at 2 Meters (C)',
           'WS50M_RANGE': 'Wind Speed Range at 50 Meters (m/s)',
           'WS10M_RANGE': 'Wind Speed Range at 10 Meters (m/s)'
}

## Southern Region

In [None]:
df_southern_mean.rename(columns=columns, inplace=True)

In [None]:
df_southern_mean

In [None]:
# phase one and phase 2 seperation here
phase1_southern_mean, phase2_southern_mean = phase_mask(df_southern_mean, "2020-03-01", "2020-10-01", "2020-10-01", "2021-03-16")

In [None]:
# dropping columns here (Drop cases and deaths to better visualize the atmospheric data)
# phase1_southern_mean = phase1_southern_mean.drop(["cases", "deaths"], axis=1)
# phase2_southern_mean = phase2_southern_mean.drop(["cases", "deaths"], axis=1)

In [None]:
fig, ax = line_plot(phase1_southern_mean, "Southern region covid trend phase 1")

In [None]:
fig, ax = line_plot(phase2_southern_mean, "Southern region covid trend phase 2")

In [None]:
scatter_plot(df_southern_mean, "cases", columns["T2MDEW"])

In [None]:
scatter_plot(df_southern_mean, "cases", columns["T2M_MAX"]) 

In [None]:
scatter_plot(df_southern_mean, "cases", columns["T2M_MIN"])

In [None]:
scatter_plot(df_southern_mean, "cases", columns["WS50M_RANGE"])

In [None]:
scatter_plot(df_southern_mean, "cases", columns["WS10M_RANGE"])

## Nothern Region

In [None]:
df_northern_mean.rename(columns=columns, inplace=True)

In [None]:
df_northern_mean

In [None]:
# phase one and phase 2 seperation here
phase1_northern_mean, phase2_northern_mean = phase_mask(df_northern_mean, "2020-03-01", "2020-10-01", "2020-10-01", "2021-03-16")

In [None]:
# dropping columns here
# phase1_northern_mean = phase1_northern_mean.drop(["cases", "deaths"], axis=1)
# phase2_northern_mean = phase2_northern_mean.drop(["cases", "deaths"], axis=1)

In [None]:
fig, ax = line_plot(phase1_northern_mean, "Nothern region covid trend phase 1")

In [None]:
fig, ax = line_plot(phase2_northern_mean, "Nothern region covid trend phase 2")

In [None]:
scatter_plot(df_northern_mean, "cases", columns["T2MDEW"])

In [None]:
scatter_plot(df_northern_mean, "cases", columns["T2M_MAX"]) 

In [None]:
scatter_plot(df_northern_mean, "cases", columns["T2M_MIN"])

In [None]:
scatter_plot(df_northern_mean, "cases", columns["WS50M_RANGE"])

In [None]:
scatter_plot(df_northern_mean, "cases", columns["WS10M_RANGE"])

## Eastern Region

In [None]:
df_eastern_mean.rename(columns=columns, inplace=True)

In [None]:
df_eastern_mean

In [None]:
# phase one and phase 2 seperation here
phase1_eastern_mean, phase2_eastern_mean = phase_mask(df_eastern_mean, "2020-03-01", "2020-10-01", "2020-10-01", "2021-03-16")

In [None]:
# dropping columns here (Drop cases and deaths to better visualize the atmospheric data)
# phase1_eastern_mean = phase1_eastern_mean.drop(["cases", "deaths"], axis=1)
# phase2_eastern_mean = phase2_eastern_mean.drop(["cases", "deaths"], axis=1)

In [None]:
fig, ax = line_plot(phase1_eastern_mean, "Eastern region covid trend phase 1")

In [None]:
fig, ax = line_plot(phase2_eastern_mean, "Eastern region covid trend phase 2")

In [None]:
scatter_plot(df_eastern_mean, "cases", columns["T2MDEW"])

In [None]:
scatter_plot(df_eastern_mean, "cases", columns["T2M_MAX"]) 

In [None]:
scatter_plot(df_eastern_mean, "cases", columns["T2M_MIN"])

In [None]:
scatter_plot(df_eastern_mean, "cases", columns["WS50M_RANGE"])

In [None]:
scatter_plot(df_eastern_mean, "cases", columns["WS10M_RANGE"])

## Western Region

In [None]:
df_western_mean.rename(columns=columns, inplace=True)

In [None]:
df_western_mean

In [None]:
# phase one and phase 2 seperation here
phase1_western_mean, phase2_western_mean = phase_mask(df_western_mean, "2020-03-01", "2020-10-01", "2020-10-01", "2021-03-16")

In [None]:
# dropping columns here
# phase1_western_mean = phase1_western_mean.drop(["cases", "deaths"], axis=1)
# phase2_western_mean = phase2_western_mean.drop(["cases", "deaths"], axis=1)

In [None]:
fig, ax = line_plot(phase1_western_mean, "Western region covid trend phase 1")

In [None]:
fig, ax = line_plot(phase1_western_mean, "Western region covid trend phase 2")

In [None]:
scatter_plot(df_western_mean, "cases", columns["RH2M"]) 

In [None]:
scatter_plot(df_western_mean, "cases", columns["T2MDEW"])

In [None]:
scatter_plot(df_western_mean, "cases", columns["T2M_MAX"]) 

In [None]:
scatter_plot(df_western_mean, "cases", columns["T2M_MIN"])

In [None]:
scatter_plot(df_western_mean, "cases", columns["WS50M_RANGE"])

In [None]:
scatter_plot(df_western_mean, "cases", columns["WS10M_RANGE"])

## Central Region

In [None]:
df_central_mean.rename(columns=columns, inplace=True)

In [None]:
df_central_mean

In [None]:
phase1_central_mean, phase2_central_mean = phase_mask(df_central_mean, "2020-03-01", "2020-10-01", "2020-10-01", "2021-03-16")

In [None]:
fig, ax = line_plot(phase1_central_mean, "Western region covid trend phase 1")

In [None]:
fig, ax = line_plot(phase2_central_mean, "Western region covid trend phase 1")

In [None]:
scatter_plot(df_central_mean, "cases", columns["RH2M"]) 

In [None]:
scatter_plot(df_central_mean, "cases", columns["T2MDEW"])

In [None]:
scatter_plot(df_central_mean, "cases", columns["T2M_MAX"]) 

In [None]:
scatter_plot(df_central_mean, "cases", columns["T2M_MIN"])

In [None]:
scatter_plot(df_central_mean, "cases", columns["WS50M_RANGE"])

In [None]:
scatter_plot(df_central_mean, "cases", columns["WS10M_RANGE"])

## Converting region dataframes into excel files

In [None]:
df_southern_mean.to_excel("Southern_weather_cases.xlsx") 
df_northern_mean.to_excel("northern_weather_cases.xlsx") 
df_eastern_mean.to_excel("eastern_weather_cases.xlsx") 
df_western_mean.to_excel("western_weather_cases.xlsx")