# Super COVID-19 dataframe

In [1]:
import pandas as pd 
import numpy as np

### Confirmed Cases
Data frame for confirmed COVID-19 cases

In [2]:
cases_df = pd.read_csv('COVID-19_DATA/covid_confirmed_usafacts.csv')
cases_df.head()

Unnamed: 0,countyFIPS,County Name,State,StateFIPS,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,...,2023-07-14,2023-07-15,2023-07-16,2023-07-17,2023-07-18,2023-07-19,2023-07-20,2023-07-21,2023-07-22,2023-07-23
0,0,Statewide Unallocated,AL,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1001,Autauga County,AL,1,0,0,0,0,0,0,...,19913,19913,19913,19913,19913,19913,19913,19913,19913,19913
2,1003,Baldwin County,AL,1,0,0,0,0,0,0,...,70521,70521,70521,70521,70521,70521,70521,70521,70521,70521
3,1005,Barbour County,AL,1,0,0,0,0,0,0,...,7582,7582,7582,7582,7582,7582,7582,7582,7582,7582
4,1007,Bibb County,AL,1,0,0,0,0,0,0,...,8149,8149,8149,8149,8149,8149,8149,8149,8149,8149


### Confirmed Deaths
Data frame for COVID-19 deaths

In [3]:
deaths_df = pd.read_csv('COVID-19_DATA/covid_deaths_usafacts.csv')
deaths_df.head()

Unnamed: 0,countyFIPS,County Name,State,StateFIPS,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,...,2023-07-14,2023-07-15,2023-07-16,2023-07-17,2023-07-18,2023-07-19,2023-07-20,2023-07-21,2023-07-22,2023-07-23
0,0,Statewide Unallocated,AL,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1001,Autauga County,AL,1,0,0,0,0,0,0,...,235,235,235,235,235,235,235,235,235,235
2,1003,Baldwin County,AL,1,0,0,0,0,0,0,...,731,731,731,731,731,731,731,731,731,731
3,1005,Barbour County,AL,1,0,0,0,0,0,0,...,104,104,104,104,104,104,104,104,104,104
4,1007,Bibb County,AL,1,0,0,0,0,0,0,...,111,111,111,111,111,111,111,111,111,111


### Population for counties

In [4]:
population_df = pd.read_csv('COVID-19_DATA/covid_county_population_usafacts.csv')
population_df.head()

Unnamed: 0,countyFIPS,County Name,State,population
0,0,Statewide Unallocated,AL,0
1,1001,Autauga County,AL,55869
2,1003,Baldwin County,AL,223234
3,1005,Barbour County,AL,24686
4,1007,Bibb County,AL,22394


### Merge all three variables (cases, deaths, and population)

In [14]:
# Merges cases_df with deaths_df csv
merged_cases_deaths = pd.merge(cases_df, deaths_df, on=['countyFIPS','County Name', 'State'], suffixes=('_cases', '_deaths'))

# Merges merged_cases_deaths with population_df
super_covid_df = pd.merge(merged_cases_deaths, population_df, on=['countyFIPS','State'])

# Drops the duplicate rows
merged_df = super_covid_df.drop_duplicates()

# Gets rid of the rows with 'Statewide Unallocated'
super_covid_csv = merged_df[merged_df["County Name_x"] != "Statewide Unallocated"]

# Resets the indexes
super_covid_csv = super_covid_csv.reset_index(drop=True)

# Save super data to csv file
super_covid_csv.to_csv('super_COVID19_data.csv', index=False)


### Super COVID-19 data

In [16]:
super_covid_csv

Unnamed: 0,countyFIPS,County Name_x,State,StateFIPS_cases,2020-01-22_cases,2020-01-23_cases,2020-01-24_cases,2020-01-25_cases,2020-01-26_cases,2020-01-27_cases,...,2023-07-16_deaths,2023-07-17_deaths,2023-07-18_deaths,2023-07-19_deaths,2023-07-20_deaths,2023-07-21_deaths,2023-07-22_deaths,2023-07-23_deaths,County Name_y,population
0,1001,Autauga County,AL,1,0,0,0,0,0,0,...,235,235,235,235,235,235,235,235,Autauga County,55869
1,1003,Baldwin County,AL,1,0,0,0,0,0,0,...,731,731,731,731,731,731,731,731,Baldwin County,223234
2,1005,Barbour County,AL,1,0,0,0,0,0,0,...,104,104,104,104,104,104,104,104,Barbour County,24686
3,1007,Bibb County,AL,1,0,0,0,0,0,0,...,111,111,111,111,111,111,111,111,Bibb County,22394
4,1009,Blount County,AL,1,0,0,0,0,0,0,...,261,261,261,261,261,261,261,261,Blount County,57826
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3137,56037,Sweetwater County,WY,56,0,0,0,0,0,0,...,142,142,142,142,142,142,142,142,Sweetwater County,42343
3138,56039,Teton County,WY,56,0,0,0,0,0,0,...,16,16,16,16,16,16,16,16,Teton County,23464
3139,56041,Uinta County,WY,56,0,0,0,0,0,0,...,43,43,43,43,43,43,43,43,Uinta County,20226
3140,56043,Washakie County,WY,56,0,0,0,0,0,0,...,51,51,51,51,51,51,51,51,Washakie County,7805


# Calculate COVID-19 Trends
## State: Hawaii
Calculate COVID-19 data trends for last week of the data. Are the cases increasing, decreasing, or stable? Each student chooses a state to analyze

In [22]:
# Gets only rows with 'HI' - Hawaii State
HI_df = cases_df[cases_df['State'] == 'HI']

# The last seven days (last week of data)
last_week = HI_df.iloc[:, -7:]

HI_df

Unnamed: 0,countyFIPS,County Name,State,StateFIPS,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,...,2023-07-14,2023-07-15,2023-07-16,2023-07-17,2023-07-18,2023-07-19,2023-07-20,2023-07-21,2023-07-22,2023-07-23
557,0,Statewide Unallocated,HI,15,0,0,0,0,0,0,...,9244,9244,9244,9244,9244,9244,9244,9247,9247,9247
558,15001,Hawaii County,HI,15,0,0,0,0,0,0,...,44147,44147,44147,44147,44147,44147,44147,44237,44237,44237
559,15003,City and County of Honolulu,HI,15,0,0,0,0,0,0,...,273195,273195,273195,273195,273195,273195,273195,273658,273658,273658
560,15005,Kalawao County,HI,15,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
561,15007,Kauai County,HI,15,0,0,0,0,0,0,...,19901,19901,19901,19901,19901,19901,19901,19948,19948,19948
562,15009,Maui County,HI,15,0,0,0,0,0,0,...,46594,46594,46594,46594,46594,46594,46594,46667,46667,46667


# MODIFY census_demographic

In [None]:
import pandas as pd
import numpy as np

census = pd.read_csv('census_demographic.csv', low_memory=False)

# Saves the original first two columns
two_columns = census.iloc[:, :2]

# Concatenates the original first two columns along with the entries from row 0 ignoring the first two entries of the row
new_columns = list(two_columns.columns) + list(census.iloc[0,2:])

# Assigns the new column names to the DataFrame
census.columns = new_columns

# Clean the columns to get rid of the '!!' and replace with ' '
census.columns = census.columns.str.replace('!', ' ')

# Drops the first row and resets the indexes
census = census.drop(index=0)
census = census.reset_index(drop=True)

# Drops Puerto Rico counties and resets the indexes
census = census[~census['NAME'].str.contains('Puerto Rico', na=False)]
census = census.reset_index(drop=True)

# Change column 'GEO_ID' to 'countyFIPS' and remove '0500000US'


# Change column 'NAME' to 'County Name' 


# Save Edited Demographic CSV
# census.to_csv('Modified_Census_Demographic.csv', index=False)