In [1]:
# Run cell to import libraries and load data sets
import geopandas as gpd 
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import contextily 
import mapclassify 
import folium
import aiohttp
import fsspec
import warnings
warnings.filterwarnings('ignore')
plt.style.use("ggplot")

# Load the COVID-19 data:
df_cases = pd.read_csv("https://raw.githubusercontent.com/babdelfa/project/refs/heads/main/cases_data.csv")
df_deaths = pd.read_csv("https://raw.githubusercontent.com/babdelfa/project/refs/heads/main/deaths_data.csv")  
df_cases.columns = df_cases.columns.str.upper()
df_deaths.columns = df_deaths.columns.str.upper()

# Load the GeoDataFrame containing United States geometry shapes (at a county-level):
county_shapes = "https://github.com/babdelfa/gis/blob/main/counties_geometry.zip?raw=true"
with fsspec.open(county_shapes) as counties_file:
    county_shapes = gpd.read_file(counties_file)
county_shapes = county_shapes[['FIPS_BEA', 'geometry']].copy()


df_deaths[df_deaths["STATE"] == "Texas"]

Unnamed: 0,ISO3,FIPS,COUNTY,STATE,LATE,LONG_,COMBINED_KEY,POPULATION,1/22/2020,1/23/2020,...,12/22/2021,12/23/2021,12/24/2021,12/25/2021,12/26/2021,12/27/2021,12/28/2021,12/29/2021,12/30/2021,12/31/2021
2700,USA,48001.0,Anderson,Texas,31.815347,-95.653548,"Anderson, Texas, US",57735,0,0,...,208,208,208,208,208,208,208,208,208,208
2701,USA,48003.0,Andrews,Texas,32.304686,-102.637655,"Andrews, Texas, US",18705,0,0,...,65,65,65,65,65,65,65,65,65,65
2702,USA,48005.0,Angelina,Texas,31.254573,-94.609015,"Angelina, Texas, US",86715,0,0,...,425,426,426,426,426,426,426,427,427,427
2703,USA,48007.0,Aransas,Texas,28.105562,-96.999505,"Aransas, Texas, US",23510,0,0,...,67,67,67,67,67,67,67,67,67,67
2704,USA,48009.0,Archer,Texas,33.615700,-98.687546,"Archer, Texas, US",8553,0,0,...,24,24,24,24,24,24,24,24,24,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2951,USA,48499.0,Wood,Texas,32.787224,-95.382364,"Wood, Texas, US",45539,0,0,...,238,238,238,238,238,239,239,239,241,241
2952,USA,48501.0,Yoakum,Texas,33.173202,-102.827643,"Yoakum, Texas, US",8713,0,0,...,35,35,35,35,35,35,35,35,35,35
2953,USA,48503.0,Young,Texas,33.176597,-98.687909,"Young, Texas, US",18010,0,0,...,74,74,74,74,74,74,74,74,74,74
2954,USA,48505.0,Zapata,Texas,27.001564,-99.169872,"Zapata, Texas, US",14179,0,0,...,46,46,46,46,46,46,46,46,46,46


In [2]:
# Provide your project code below:

#Drop unecessary columns
df_cases.drop(columns=["UID", "ISO3", "CODE3", "LAT", "LONG_","COMBINED_KEY"], inplace=True)   
df_cases.head()

df_deaths.drop(columns = ["ISO3", "LATE", "LONG_","POPULATION","COMBINED_KEY"], inplace = True)  
df_deaths.tail()


df_cases_melted = pd.melt(df_cases, id_vars=["COUNTY", "STATE","FIPS"], var_name= "DATES", value_name= "CASES")

df_deaths_melted = pd.melt(df_deaths, id_vars=["COUNTY", "STATE", "FIPS"], var_name= "DATES", value_name= "DEATHS")


df_cases_melted["DATES"] = pd.to_datetime(df_cases_melted["DATES"])
df_deaths_melted["DATES"] = pd.to_datetime(df_deaths_melted["DATES"])

#Create a merged dataset in which analysis can be conducted on 
df_merged = pd.merge(df_cases_melted, df_deaths_melted, 
                     on=["COUNTY", "STATE","DATES","FIPS"], suffixes=("cases", "deaths"))
df_merged.rename(columns = {"CASEScases":"CASES", "CASESdeaths":"DEATHS"}, inplace =True) 

gdf = pd.merge(county_shapes, df_merged, left_on ='FIPS_BEA',right_on = 'FIPS', how = 'inner') 
gdf.head()



# Initial Prompts
name = input("Hello. Please enter your name: ")
print()  # Blank line

state_input = input("Which state's COVID-19 information would you like to see?\n\nEnter the state: ")
state = state_input.title()


#Detect the state column name based on inputed state name
state_df = df_merged[df_merged["STATE"] == state]

ts = state_df.groupby("DATES")[["CASES","DEATHS"]].sum().sort_index()

dates = ts.index
state_cases = ts["CASES"]
state_deaths = ts["DEATHS"]

#Compute daily new cases and deaths
daily_cases  = state_cases.diff().fillna(state_cases.iloc[0])
daily_deaths = state_deaths.diff().fillna(state_deaths.iloc[0])

#Determine First Day
day0 = next(d for d, v in zip(dates, state_cases) if v > 0)
day0_str = day0.strftime("%B %d, %Y")

print(f"COVID-19 in {state}: Key Statistics\n")
print("Timeline:\n")
print(f"Day 0 of COVID-19 in {state}: {day0_str}\n\n")
print(f"{state} Data by Year:\n")

#Define analysis periods
end_2020   = pd.to_datetime("2020-12-31")
start_2021 = pd.to_datetime("2021-01-01")
end_2021   = pd.to_datetime("2021-12-31")

#2020 totals & averages
total_c20 = 0
total_d20 = 0
days_20   = 0
sum_c20   = 0.0
sum_d20   = 0.0
for d in dates:
    if day0 <= d <= end_2020:
        total_c20 += daily_cases.loc[d]
        sum_c20   += daily_cases.loc[d]
        total_d20 += daily_deaths.loc[d]
        sum_d20   += daily_deaths.loc[d]
        days_20   += 1
avg_c20 = sum_c20 / days_20
avg_d20 = sum_d20 / days_20

#Print 2020 results
print(f"2020 (from {day0_str.split(',')[0]}):")
print(f"  - Total reported cases: {int(total_c20):,}")
print(f"  - Average daily new cases: {avg_c20:,.2f}")
print(f"  - Total reported deaths: {int(total_d20):,}")
print(f"  - Average daily deaths: {avg_d20:,.2f}\n")

#2021 totals & averages
total_c21 = 0
total_d21 = 0
days_21   = 0
sum_c21   = 0.0
sum_d21   = 0.0
for d in dates:
    if start_2021 <= d <= end_2021:
        total_c21 += daily_cases.loc[d]
        sum_c21   += daily_cases.loc[d]
        total_d21 += daily_deaths.loc[d]
        sum_d21   += daily_deaths.loc[d]
        days_21   += 1
avg_c21 = sum_c21 / days_21
avg_d21 = sum_d21 / days_21

#Print 2021 results
print("2021:")
print(f"  - Total reported cases: {int(total_c21):,}")
print(f"  - Average daily new cases: {avg_c21:,.2f}")
print(f"  - Total reported deaths: {int(total_d21):,}")
print(f"  - Average daily deaths: {avg_d21:,.2f}\n")

#Total as of Dec 31, 2021
if end_2021 in state_cases.index:
    overall_c = int(state_cases.loc[end_2021])
    overall_d = int(state_deaths.loc[end_2021])
else:
    overall_c = int(state_cases.iloc[-1])
    overall_d = int(state_deaths.iloc[-1])
#Print total as of Dec 31, 2021
print(f"Overall Totals in {state} (as of December 31, 2021):")
print(f"  - Total cases: {overall_c:,}")
print(f"  - Total deaths: {overall_d:,}\n")

#Print Visualization menu
print(f"{name.upper()}, please select a data visualization option for {state}\n")
print(" 1. View four subplots showing COVID-19 trends in " + state + " (2020-2021):")
print("   * Total reported cases")
print("   * Daily new cases")
print("   * Total reported deaths")
print("   * Daily new deaths\n")
print(" 2. View a choropleth map showing total reported cases and deaths by county in " 
      + state + " as of December 31, 2021.\n\n")
choice = input("Enter your choice (1 or 2):  ")

#Output based on choice (1 or 2)
if choice == "1":
    fig, ax = plt.subplots(2, 2, figsize=(12, 9))
    fig.suptitle(f"{state} COVID-19 Report for {name}", fontsize=16)

    ax[0,0].bar(dates, daily_cases)
    ax[0,0].set_title("1. Bar Chart of Daily New Cases")
    ax[0,0].set_ylabel("Count")
    ax[0,0].yaxis.set_major_formatter(ticker.StrMethodFormatter("{x:,.0f}"))
    ax[0,0].xaxis.set_major_locator(mdates.MonthLocator(interval=3))
    ax[0,0].xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
    plt.setp(ax[0,0].get_xticklabels(), rotation=45)


    ax[0,1].plot(dates, state_cases)
    ax[0,1].set_title("2. Line Chart of Cumulative Cases Trend")
    ax[0,1].set_ylabel("Total Cases")
    ax[0,1].yaxis.set_major_formatter(ticker.StrMethodFormatter("{x:,.0f}"))
    ax[0,1].xaxis.set_major_locator(mdates.MonthLocator(interval=3))
    ax[0,1].xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
    plt.setp(ax[0,1].get_xticklabels(), rotation=45)


    ax[1,0].bar(dates, daily_deaths)
    ax[1,0].set_title("3. Bar Chart of Daily New Deaths")
    ax[1,0].set_ylabel("Count")
    ax[1,0].yaxis.set_major_formatter(ticker.StrMethodFormatter("{x:,.0f}"))
    ax[1,0].xaxis.set_major_locator(mdates.MonthLocator(interval=3))
    ax[1,0].xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
    plt.setp(ax[1,0].get_xticklabels(), rotation=45)

    ax[1,1].plot(dates, state_deaths)
    ax[1,1].set_title("4. Line Chart of Cumulative Deaths Trend")
    ax[1,1].set_ylabel("Total Deaths")
    ax[1,1].yaxis.set_major_formatter(ticker.StrMethodFormatter("{x:,.0f}"))
    ax[1,1].xaxis.set_major_locator(mdates.MonthLocator(interval=3))
    ax[1,1].xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
    plt.setp(ax[1,1].get_xticklabels(), rotation=45)

    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.show()

elif choice == "2":
    end_date = pd.to_datetime("2021-12-31")
    last_df = df_merged[
        (df_merged["STATE"] == state) &
        (df_merged["DATES"] == end_date)
    ]

    last_df = last_df.rename(columns={"FIPS": "FIPS_BEA"})
    counts = (
        last_df
        .groupby("FIPS_BEA")[["CASES", "DEATHS"]]
        .sum()
        .reset_index()
    )

    county_shapes["FIPS_BEA"] = county_shapes["FIPS_BEA"].astype(int)
    counts["FIPS_BEA"]       = counts["FIPS_BEA"].astype(int)
    map_df = county_shapes.merge(counts, on="FIPS_BEA")

    names = (
        last_df[["FIPS_BEA","COUNTY","STATE"]]
        .drop_duplicates()
        .rename(columns={
            "COUNTY": "County Name",
            "STATE":  "State Name"
        })
    )
    map_df = map_df.merge(names, on="FIPS_BEA")

    print(f"Choropleth Map: Reported COVID-19 Cases as of December 31, 2021 in {state}")

    m = map_df.explore(
        column="CASES",             
        cmap="YlOrRd",
        scheme="equalinterval",
        popup=["County Name", "State Name", "CASES", "DEATHS"]
    )
    display(m)
    


Hello. Please enter your name:  REVIEWER





Which state's COVID-19 information would you like to see?

Enter the state:  TEXAS


COVID-19 in Texas: Key Statistics

Timeline:

Day 0 of COVID-19 in Texas: March 05, 2020


Texas Data by Year:

2020 (from March 05):
  - Total reported cases: 1,805,998
  - Average daily new cases: 5,980.13
  - Total reported deaths: 28,090
  - Average daily deaths: 93.01

2021:
  - Total reported cases: 2,820,753
  - Average daily new cases: 7,728.09
  - Total reported deaths: 47,550
  - Average daily deaths: 130.27

Overall Totals in Texas (as of December 31, 2021):
  - Total cases: 4,626,751
  - Total deaths: 75,640

REVIEWER, please select a data visualization option for Texas

 1. View four subplots showing COVID-19 trends in Texas (2020-2021):
   * Total reported cases
   * Daily new cases
   * Total reported deaths
   * Daily new deaths

 2. View a choropleth map showing total reported cases and deaths by county in Texas as of December 31, 2021.




Enter your choice (1 or 2):   2


Choropleth Map: Reported COVID-19 Cases as of December 31, 2021 in Texas
