## Step 1: Import Packages

In [None]:
##Import the necessary packages
#Data manipulation
import pandas as pd
import numpy as np
#Visualization
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

In [None]:
#set theme in seaborn
sns.set_theme(style="whitegrid")

## Step 2: Load Data

In [None]:
dataframe = pd.read_csv('data/Georgia_COVID-19_Case_Data.csv')

In [None]:
dataframe

### Describing the variables

In [None]:
dataframe.info()

In [None]:
list(dataframe)

In [None]:
dataframe['COUNTY'].value_counts()

In [None]:
dataframe_counties = dataframe['COUNTY'].value_counts().sort_index()
dataframe_counties.head(20)

In [None]:
dataframe['DATESTAMP']

In [None]:
#Creating a copy of the existing column, so we keep the original version
dataframe['DATESTAMP_MOD'] = dataframe['DATESTAMP']
print(dataframe['DATESTAMP_MOD'].head(10))
print(dataframe['DATESTAMP_MOD'].dtypes)

In [None]:
dataframe['DATESTAMP_MOD'] = pd.to_datetime(dataframe['DATESTAMP_MOD'])
dataframe['DATESTAMP_MOD'].dtypes

In [None]:
dataframe[['DATESTAMP_MOD', 'DATESTAMP']]

In [None]:
dataframe['DATESTAMP_MOD_DAY'] = dataframe['DATESTAMP_MOD'].dt.date
dataframe['DATESTAMP_MOD_DAY']


In [None]:
dataframe['DATESTAMP_MOD_MONTH_YEAR'] = dataframe['DATESTAMP_MOD'].dt.to_period('M')
dataframe[['DATESTAMP_MOD_MONTH_YEAR']]

In [None]:
dataframe['DATESTAMP_MOD_MONTH'] = dataframe['DATESTAMP_MOD'].dt.month_name()
dataframe[['DATESTAMP_MOD_MONTH','DATESTAMP_MOD_DAY', 'DATESTAMP_MOD', 'DATESTAMP']]

In [None]:
dataframe['DATESTAMP_MOD_WEEK'] = dataframe['DATESTAMP_MOD'].dt.week
dataframe[['DATESTAMP_MOD_MONTH','DATESTAMP_MOD_DAY', 'DATESTAMP_MOD', 'DATESTAMP', 'DATESTAMP_MOD_WEEK']]

In [None]:
dataframe['DATESTAMP_MOD_QUARTER'] = dataframe['DATESTAMP_MOD'].dt.to_period('Q')
dataframe[['DATESTAMP_MOD_MONTH','DATESTAMP_MOD_DAY', 'DATESTAMP_MOD', 'DATESTAMP', 'DATESTAMP_MOD_WEEK', 'DATESTAMP_MOD_QUARTER']]

In [None]:
dataframe['DATESTAMP_MOD_DAY_STRING'] = dataframe['DATESTAMP_MOD_DAY'].astype(str)
dataframe['DATESTAMP_MOD_WEEK_STRING'] = dataframe['DATESTAMP_MOD_WEEK'].astype(str)
dataframe['DATESTAMP_STRING'] = dataframe['DATESTAMP_MOD_MONTH_YEAR'].astype(str)

In [None]:
dataframe

## Step 3: Getting the counties required for our analysis

We know the counties we want to analyze are:
- Cobb
- DeKalb
- Fulton
- Gwinnett
- Hall

In [None]:
dataframe['COUNTY']

In [None]:
countlist = ['COBB', 'DEKALB', 'FULTON', 'GWINNETT', 'HALL']
countlist

In [None]:
selectCounties = dataframe[dataframe['COUNTY'].isin(countlist)]
len(selectCounties)

### Getting just the specific date/time we want

In [None]:
selectCountyTime = selectCounties
selectCountyTime

In [None]:
selectCountTime_aprilmay2020 = selectCountyTime[(selectCountyTime['DATESTAMP_MOD_MONTH_YEAR'] == '2020-04') | (selectCountyTime['DATESTAMP_MOD_MONTH_YEAR'] == '2020-05')]
len(selectCountTime_aprilmay2020)

In [None]:
selectCountTime_aprilmay2020.sample(50)

### Creating the final datafram/specfic column attributes we care about

In [None]:
finaldf = selectCountTime_aprilmay2020[[
    'COUNTY',
    'DATESTAMP_MOD',
    'DATESTAMP_MOD_DAY',
    'DATESTAMP_MOD_DAY_STRING',
    'DATESTAMP_STRING',
    'DATESTAMP_MOD_MONTH_YEAR',
    'C_New', #New Cases
    'C_Cum', #Total Cases
    'H_New', #New Hospitalizations
    'H_Cum', #Total Hospitalizations
    'D_New', #New Deaths
    'D_Cum' #Total Deaths
    ]]

In [None]:
finaldf

## Looking at total covid cases by month

In [None]:
finaldf_dropdups = finaldf.drop_duplicates(subset=['COUNTY', 'DATESTAMP_STRING'], keep='last')
finaldf_dropdups

In [None]:
pd.pivot_table(finaldf_dropdups, values='C_Cum', index=['COUNTY'],
                    columns=['DATESTAMP_MOD_MONTH_YEAR'], aggfunc=np.sum)

In [None]:
vis1 = sns.barplot(x='DATESTAMP_MOD_MONTH_YEAR', y = 'C_Cum', data=finaldf_dropdups)

In [None]:
vis2 = sns.barplot(x='DATESTAMP_MOD_MONTH_YEAR', y = 'C_Cum', hue='COUNTY', data=finaldf_dropdups)

In [None]:
plotly1 = px.bar(finaldf_dropdups, x='DATESTAMP_STRING', y = 'C_Cum', color='COUNTY', barmode='group')
plotly1.show()

In [None]:
plotly2 = px.bar(finaldf_dropdups, x='DATESTAMP_STRING', y = 'C_Cum', color='COUNTY', barmode='stack')
plotly2.show()

### Looking at COVID-19 Cases by day

In [None]:
daily = finaldf
len(daily)

In [None]:
pd.pivot_table(daily, values='C_Cum', index=['COUNTY'],
                    columns=['DATESTAMP_MOD_DAY'], aggfunc=np.sum)

In [None]:
pd.pivot_table(daily, values='C_Cum', index=['DATESTAMP_MOD_DAY'],
                    columns=['COUNTY'], aggfunc=np.sum)

In [None]:
startDate = pd.to_datetime('2020-04-26').date()
endDate = pd.to_datetime('2020-05-09').date()

maskFilter = (daily['DATESTAMP_MOD_DAY'] >= startDate) & (daily['DATESTAMP_MOD_DAY'] <= endDate)
dailySpecific = daily.loc[maskFilter]
dailySpecific

In [None]:
dailySpecific[dailySpecific['COUNTY'] == 'FULTON']

In [None]:
vis3 = sns.lineplot(data=dailySpecific, x='DATESTAMP_MOD_DAY', y='C_Cum')

In [None]:
vis4 = sns.lineplot(data=dailySpecific, x='DATESTAMP_MOD_DAY', y='C_Cum', hue='COUNTY')

In [None]:
plotly3 = px.bar(dailySpecific, x='DATESTAMP_MOD_DAY', y = 'C_Cum', color='COUNTY')
plotly3.show()

In [None]:
plotly4 = px.bar(dailySpecific, 
                 x='DATESTAMP_MOD_DAY', 
                 y = 'H_New', 
                 color='COUNTY',
                 barmode= 'group')
plotly4.show()

In [None]:
plotly5 = px.bar(dailySpecific, 
                 x='DATESTAMP_MOD_DAY', 
                 y = 'H_Cum', 
                 color='COUNTY',
                 barmode= 'group')
plotly5.show()

In [None]:
plotly6 = px.bar(dailySpecific, 
                 x='DATESTAMP_MOD_DAY', 
                 y = 'D_New', 
                 color='COUNTY',
                 barmode= 'group')
plotly6.show()

In [None]:
plotly7 = px.bar(dailySpecific, 
                 x='DATESTAMP_MOD_DAY', 
                 y = 'D_Cum', 
                 color='COUNTY',
                 barmode= 'group')
plotly7.show()

In [265]:
dailySpecific['newHospAndDeathsAndCovid'] = dailySpecific['H_New'].astype(int) + dailySpecific['D_New'].astype(int) + dailySpecific['C_New'].astype(int)
dailySpecific['newHospAndDeathsAndCovid']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



7489      52
7490      63
7491      90
7492     103
7493      83
        ... 
79995     56
79996     36
79997     16
79998     40
79999     24
Name: newHospAndDeathsAndCovid, Length: 70, dtype: int64

In [266]:
dailySpecific

Unnamed: 0,COUNTY,DATESTAMP_MOD,DATESTAMP_MOD_DAY,DATESTAMP_MOD_DAY_STRING,DATESTAMP_STRING,DATESTAMP_MOD_MONTH_YEAR,C_New,C_Cum,H_New,H_Cum,D_New,D_Cum,newHospAndDeaths,newHospAndDeathsAndCovid
7489,FULTON,2020-04-26 03:59:59+00:00,2020-04-26,2020-04-26,2020-04,2020-04,36,2910,10,752,6,244,16,52
7490,FULTON,2020-04-27 03:59:59+00:00,2020-04-27,2020-04-27,2020-04,2020-04,51,2961,8,760,4,248,12,63
7491,FULTON,2020-04-28 03:59:59+00:00,2020-04-28,2020-04-28,2020-04,2020-04,77,3038,10,770,3,251,13,90
7492,FULTON,2020-04-29 03:59:59+00:00,2020-04-29,2020-04-29,2020-04,2020-04,78,3116,17,787,8,259,25,103
7493,FULTON,2020-04-30 03:59:59+00:00,2020-04-30,2020-04-30,2020-04,2020-04,73,3189,5,792,5,264,10,83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79995,HALL,2020-05-01 03:59:59+00:00,2020-05-01,2020-05-01,2020-05,2020-05,47,2069,8,305,1,63,9,56
79996,HALL,2020-05-02 03:59:59+00:00,2020-05-02,2020-05-02,2020-05,2020-05,26,2095,7,312,3,66,10,36
79997,HALL,2020-05-03 03:59:59+00:00,2020-05-03,2020-05-03,2020-05,2020-05,12,2107,4,316,0,66,4,16
79998,HALL,2020-05-04 03:59:59+00:00,2020-05-04,2020-05-04,2020-05,2020-05,35,2142,4,320,1,67,5,40


In [276]:
plotly8 = px.bar(dailySpecific, 
                 x='DATESTAMP_MOD_DAY', 
                 y = 'newHospAndDeathsAndCovid', 
                 color='COUNTY',
                 barmode= 'group',
                 title='Georgia 2020 Covid Data: New Hospitalizations, Deaths, and COVID-19 Cases',
                 labels={'DATESTAMP_MOD_DAY':'Time (Month, Day, Year)', 
                         'newHospAndDeathsAndCovid':'New Hospitalizations, Deaths, and COVID-19 Cases', 
                         'COUNTY':'County'},
                 )
plotly8.update_layout(
    xaxis=dict(
        tickmode = 'linear',
        type = 'category',
    )
)
plotly8.show()