# COVID-Team-1
### CSC 405 01
### Last Updated: 10/13/2020


### Module Imports

In [None]:
import pandas as pd

### Module's Docstring

In [None]:
"""Gives streamlined access to data paths and dataframes regarding Covid data."""

### Data paths

In [None]:
# Primary data paths.
_covid_cases_path = "../data/Covid/covid_confirmed_usafacts.csv"
_covid_population_path = "../data/Covid/covid_county_population_usafacts.csv"
_covid_death_path = "../data/Covid/covid_deaths_usafacts.csv"
_covid_primary_data_path = "../data/Covid/covid_primary_data.csv"

# Extension data paths.
_county_employment_path = "../data/Employment/county-ests-employment-and-wages.csv"
_county_hospital_path = "../data/Hospitals/Definitive_Healthcare _USA_Hospital_Beds.csv"
_housing_demographics_path = "../data/Housing/ACSDP1Y2018.DP05_data_with_overlays_2020-09-03T141858.csv"
_housing_demographics_and_primary_merged = "../data/Housing/housing_demographics_primary_data.csv"

# Daily changes data paths.
_covid_daily_cases_path = "../data/Daily/covid_daily_cases.csv"
_covid_daily_deaths_path = "../data/Daily/covid_daily_deaths.csv"
# Enumerations
_cases = 0
_deaths = 1

### Getters for Unmerged Data

In [None]:
# Reads in the cases data and adds the prefix "cases -" to ensure date attributes are distinct for merging.
def get_covid_by_cases():
    """Returns dataframe containing data on Covid cases."""
    df_cases = pd.read_csv(_covid_cases_path)
    df_cases = df_cases.add_prefix("cases - ")
    df_cases.rename(columns = {"cases - countyFIPS": "countyFIPS", \
                               "cases - County Name": "County Name", \
                               "cases - State": "State", \
                               "cases - stateFIPS": "stateFIPS"}, \
                    inplace = True,
                   )
    return df_cases

# Reads in the deaths data and adds the prefix "deaths -" to ensure date attributes are distinct for merging.
def get_covid_by_deaths():
    """Returns dataframe containing data on Covid deaths."""
    df_deaths = pd.read_csv(_covid_death_path)
    df_deaths = df_deaths.add_prefix("deaths - ")
    df_deaths.rename(columns = {"deaths - countyFIPS": "countyFIPS", \
                                "deaths - County Name": "County Name", \
                                "deaths - State": "State", \
                                "deaths - stateFIPS": "stateFIPS"}, \
                     inplace = True,
                    )
    return df_deaths

def get_county_population():
    """Returns dataframe containing data on population."""
    return pd.read_csv(_covid_population_path)

def get_county_employment():
    """Returns dataframe containing data on employment and wages."""
    return pd.read_csv(_county_employment_path)

def get_county_hospital():
    """Returns dataframe containing data on hopsital statistics."""
    return pd.read_csv(_county_hospital_path)

### Getter for Merged Data

In [None]:
# Checks for file existence and returns, or creates file and returns.
def get_covid_primary_data():
    """Returns dataframe containing the primary Covid data (cases, population, death)."""
    try:
        return pd.read_csv(_covid_primary_data_path)
    except:
        covid_primary_data_to_csv()
        return pd.read_csv(_covid_primary_data_path)

### Getter for Daily Cases/Deaths

In [None]:
# Checks for file existence and returns, or creates file and returns.
def get_covid_daily_cases():
    """Returns dataframe containing the daily cases data."""
    try:
        return pd.read_csv(_covid_daily_cases_path, index_col=[0])
    except:
        covid_daily_to_csv(_cases)
        return pd.read_csv(_covid_daily_cases_path, index_col=[0])
    
# Checks for file existence and returns, or creates file and returns.
def get_covid_daily_deaths():
    """Returns dataframe containing the daily deaths data."""
    try:
        return pd.read_csv(_covid_daily_deaths_path, index_col=[0])
    except:
        covid_daily_to_csv(_deaths)
        return pd.read_csv(_covid_daily_deaths_path, index_col=[0])

### Merged Data to CSV

In [None]:
# Removes all countyFIPS values of 0, these values are not specific to any county.
def covid_primary_data_to_csv():
    """Merges the primary Covid data (cases, population, death) and outputs it to csv file."""
    df_covid_deaths = get_covid_by_deaths()
    df_county_population = get_county_population()
    # Removing all similar columns from the second and third dataframes to prevent column duplication.
    df_covid_deaths.drop(["County Name", "State", "stateFIPS"], axis = 1, inplace = True)
    df_county_population.drop(["County Name", "State"], axis = 1, inplace = True)
    first_merge = pd.merge(get_covid_by_cases(), \
                           df_covid_deaths, \
                           how = "inner", \
                           on = "countyFIPS", \
                          )
    second_merge = pd.merge(first_merge, \
                            df_county_population, \
                            how = "inner",
                            on = "countyFIPS",
                           )
    second_merge = second_merge[second_merge.countyFIPS != 0]
    second_merge.to_csv(_covid_primary_data_path, index=False)

### Daily Cases/Deaths To CSV

In [None]:
# Must use covid_data.cases or covid_data.deaths enumerated values with this method.
def covid_daily_to_csv(_daily_type):
    # Sets the correct value for daily type.
    if _daily_type == 0:
        daily_type = "cases"
        data_path = _covid_daily_cases_path
    elif _daily_type == 1:
        daily_type = "deaths"
        data_path = _covid_daily_deaths_path
    else:
        return
    # Get primary data and group by state.
    df_primary = get_covid_primary_data()
    df_primary_states = df_primary.groupby("State")
    # Create new dataframe with states as indexes and columns from primary data that match type.
    df_state_new_daily= pd.DataFrame(index = [state[0] for state in df_primary_states.State.unique()],\
                                columns = [df_primary.filter(regex = daily_type).columns])
    # Getting the number of columns, i.e. days.
    number_days = len(df_primary_states.first().filter(regex = daily_type).columns)
    # Loop from the last column to the second column and subtract the prior days total.
    for cur_column in range(number_days - 1, 0, -1):
        for state in df_primary_states:
            df_state_new_daily.loc[state[0]].iloc[cur_column] =\
                state[1].filter(regex = daily_type).iloc[:, cur_column].sum()\
                - (state[1].filter(regex = daily_type).iloc[:, cur_column - 1].sum())
    # Transferring the first data column from each state to the first of our daily stats.
    for state in df_primary_states:
        df_state_new_daily.loc[state[0]].iloc[0] =\
            state[1].filter(regex = daily_type).iloc[:, 0].sum()
    # Export data.
    df_state_new_daily.to_csv(data_path)