In [None]:
import numpy as np
import pandas as pd
from multiprocessing import Pool

In [None]:
### Create a parallelizing function
def parallel1(data, func, n_cores = 25):
    ### Split data by state into 25 sections
    splits = np.array_split(data["State"].unique(), 25)
    
    ### Create empty list
    data_split = []
    
    ### Add each split dataframe to the list
    for i in range(25):
        data_split.append(data[data["State"].isin(list(splits[i]))])
    
    ### Run 
    pool = Pool(n_cores)
    data1 = pd.concat(pool.map(func, data_split))
    pool.close()
    pool.join()
    return data1

In [None]:
### Define function to create new cases data
def newCases1(data):
    changeInCases = []
    ### For each state.
    for state in data["State"].unique():
        ### For each county in the state
        for county in data["County Name"][data["State"] == state].unique():
            ### Calculate diff in case for each day, keep first day
            changeInCases.extend(abs(np.diff(data["Total Cases"][(data["County Name"] == county) &
                                                                         (data["State"] == state)],
                                             prepend = data["Total Cases"][(data["County Name"] == county) &
                                                                         (data["State"] == state)].iloc[0])))
    ### Add to data
    data["New Cases"] = changeInCases

    return data

In [None]:
### Define function to create new deaths data
def newDeaths1(data):
    changeInDeaths = []
    ### For each state.
    for state in data["State"].unique():
        ### For each county in the state
        for county in data["County Name"][data["State"] == state].unique():
            ### Calculate diff in case for each day, keep first day
            changeInDeaths.extend(abs(np.diff(data["Total Deaths"][(data["County Name"] == county) &
                                                                           (data["State"] == state)],
                                             prepend = data["Total Deaths"][(data["County Name"] == county) &
                                                                           (data["State"] == state)].iloc[0])))
            
    ### Add to data
    data["New Deaths"] = changeInDeaths
        
    return data

In [None]:
### Create a parallelizing function
def parallel2(data, func, n_cores = 25):
    ### Split data by state into 25 sections
    splits = np.array_split(data["State"].unique(), 25)
    
    ### Create empty list
    data_split = []
    
    ### Add each split dataframe to the list
    for i in range(25):
        data_split.append(data[data["State"].isin(list(splits[i]))])
    
    pool = Pool(n_cores)
    data1 = pd.concat(pool.map(func, data_split))
    pool.close()
    pool.join()
    return data1

In [None]:
### Define function to create new cases data
def newCases2(data):
    changeInCases = []
    ### For each state.
    for state in data["State"].unique():
        ### Calculate diff in case for each day, keep first day
        changeInCases.extend(abs(np.diff(data["TotalCases"][data["State"] == state],
                                         prepend = data["TotalCases"][data["State"] == state].iloc[0])))
    ### Add to data
    data["New Cases"] = changeInCases

    return data

In [None]:
### Define function to create new deaths data
def newDeaths2(data):
    changeInDeaths = []
    ### For each state.
    for state in data["State"].unique():
        ### Calculate diff in case for each day, keep first day
        changeInDeaths.extend(abs(np.diff(data["TotalDeaths"][data["State"] == state],
                                         prepend = data["TotalDeaths"][data["State"] == state].iloc[0])))
            
    ### Add to data
    data["New Deaths"] = changeInDeaths
        
    return data

In [None]:
def run_all():

    ### Number of confirmed cases by county
    !curl https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_confirmed_usafacts.csv --output data/cases.csv

    ### Number of confirmed deaths by county
    !curl https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_deaths_usafacts.csv --output data/deaths.csv

    ### Total Cases
    cases = pd.read_csv("data/cases.csv")

    odd = "Unnamed: " + str(len(cases.columns) - 1)

    if (cases.columns[-1] == odd):
        cases = cases.drop(columns = cases.columns[-1])

    cases

    ### Total Deaths
    deaths = pd.read_csv("data/deaths.csv")

    if (cases.columns[-1] == odd):
        deaths = deaths.drop(columns = deaths.columns[-1])

    deaths

    ### Total Population
    population = pd.read_csv("data/population.csv")
    population

    #### County Data

    ### Remove Wade Hampton Area
    cases = cases.drop(list(cases[cases["County Name"] == "Wade Hampton Census Area"].index))

    ### New York City Unallocated/Probable
    cases = cases.drop(list(cases[cases["County Name"] == "New York City Unallocated/Probable"].index))

    ### Remove Grand Princess Cruise Ship
    cases = cases.drop(list(cases[cases["County Name"] == "Grand Princess Cruise Ship"].index))


    #### Deaths Data
    ### Remove Wade Hampton Area
    deaths = deaths.drop(list(deaths[deaths["County Name"] == "Wade Hampton Census Area"].index))

    ### New York City Unallocated/Probable
    deaths = deaths.drop(list(deaths[deaths["County Name"] == "New York City Unallocated/Probable"].index))

    ### Remove Grand Princess Cruise Ship
    deaths = deaths.drop(list(deaths[deaths["County Name"] == "Grand Princess Cruise Ship"].index))

    cases = cases.rename(columns = {"State" : "StateABV"})
    cases

    deaths = deaths.rename(columns = {"State" : "StateABV"})
    deaths

    ### County FIPS
    countyFIPS = pd.read_csv("data/countyFIPS.csv")
    countyFIPS

    ### State FIPS
    stateFIPS = pd.read_csv("data/stateFIPS.csv")
    stateFIPS

    ### Drop cases county labels
    cases = cases.drop(columns = "County Name")
    cases

    ### Add County Name from countyFIPS
    cases = cases.merge(countyFIPS, how = "left")
    cases

    ### Add State names from stateFIPS
    cases = cases.merge(stateFIPS, how = "left")
    cases

    ### Drop deaths county labels
    deaths = deaths.drop(columns = "County Name")
    deaths

    ### Add County Name from countyFIPS
    deaths = deaths.merge(countyFIPS, how = "left")
    deaths

    ### Add State names from stateFIPS
    deaths = deaths.merge(stateFIPS, how = "left")
    deaths

    ### Drop population county and state labels
    population = population.drop(columns = "County Name")
    population

    ### Add County Name from countyFIPS
    population = population.merge(countyFIPS, how = "left")
    population

    ### Unpivot cases data
    cases = pd.melt(cases, id_vars = ['County Name', "State", "StateABV", "countyFIPS", "stateFIPS"],
                     value_vars = cases.columns[3:-2],
                     var_name = "Date", value_name = "Cases")

    cases

    ### Unpivot death data
    deaths = pd.melt(deaths, id_vars = ['County Name', "State", "StateABV", "countyFIPS", "stateFIPS"],
                     value_vars = list(deaths.columns[3:-2]),
                     var_name = "Date", value_name = "Deaths")

    deaths

    ### Merge dataframes
    cases_deaths = cases.merge(deaths, on = ["State", "StateABV", "County Name", "Date", "countyFIPS", "stateFIPS"])
    cases_deaths

    ### Merge dataframes
    cases_deaths = cases_deaths.merge(population, on = ["countyFIPS","County Name"], how = "left")

    ### Sort
    cases_deaths = cases_deaths.astype({"Date" : "datetime64"})
    cases_deaths = cases_deaths.sort_values(["State","County Name","Date"], ascending = [True, True, True])


    ### Rename population and cases
    cases_deaths = cases_deaths.rename(columns = {"Cases" : "Total Cases",
                                                  "Deaths" : "Total Deaths"})

    cases_deaths = cases_deaths.reset_index().drop(columns = "index")
    cases_deaths

    cases_deaths.info()

    cases_deaths = cases_deaths.astype({"County Name" : "category",
                                        "State" : "category",
                                        "countyFIPS" : "str",
                                        "stateFIPS" : "str"})
    cases_deaths.info()

    ### First six states end where DC begins
    firstSix = cases_deaths[:list(cases_deaths["countyFIPS"][cases_deaths["State"] == "DC"].index)[0]]
    firstSix

    ### Create a new column with the fixed FIPS codes
    firstSix.insert(2,"countyFIPS2", '0' + firstSix["countyFIPS"])
    firstSix

    ### Drop the old FIPS codes and rename the new FIPS codes column
    firstSix = firstSix.drop(columns = "countyFIPS")
    firstSix = firstSix.rename(columns = {"countyFIPS2" : "countyFIPS"})
    firstSix

    firstSixIndex = np.arange(start = 0, stop = list(cases_deaths["countyFIPS"][cases_deaths["State"] == "DC"].index)[0])
    cases_deaths = cases_deaths.drop(firstSixIndex)
    cases_deaths

    cases_deaths = pd.concat([firstSix,cases_deaths])
    cases_deaths

    cases_deaths.info()

    cases_deaths2 = cases_deaths[cases_deaths["County Name"] != "Statewide Unallocated"]
    cases_deaths2 = cases_deaths2.reset_index()
    cases_deaths2 = cases_deaths2.drop(columns = "index")
    cases_deaths2

    ### First for Alabama
    ### Aggregate data
    StateData = cases_deaths[cases_deaths['State'] == "Alabama"].groupby("Date").agg(
            TotalCases = pd.NamedAgg(column = "Total Cases", aggfunc = sum),
            TotalDeaths = pd.NamedAgg(column = "Total Deaths", aggfunc = sum),
            Population = pd.NamedAgg(column = "Population", aggfunc = sum))

    ### Make a vector of the state and its FIPS
    state = np.repeat("Alabama", len(cases_deaths["Date"].unique()))
    stateABV = np.repeat("AL", len(cases_deaths["Date"].unique()))
    statefips = np.repeat('1', len(cases_deaths["Date"].unique()))

    ### Grab dates
    date = cases_deaths["Date"].unique()

    ### Insert into State Data
    StateData.insert(0, "stateFIPS", statefips)
    StateData.insert(0, "StateABV", stateABV)
    StateData.insert(0, "State", state)
    StateData.insert(0, "Date", date)

    ### Now the rest
    for state, fipsNum, stateABV in zip(cases_deaths["State"].unique()[1:], cases_deaths["stateFIPS"].unique()[1:], 
                                        cases_deaths["StateABV"].unique()[1:]) :
        ### Aggregate data
        myStateData = cases_deaths[cases_deaths['State'] == state].groupby("Date").agg(
            TotalCases = pd.NamedAgg(column = "Total Cases", aggfunc = sum),
            TotalDeaths = pd.NamedAgg(column = "Total Deaths", aggfunc = sum),
            Population = pd.NamedAgg(column = "Population", aggfunc = sum))

        ### Make a vector of the state/fips and grab dates
        mystate = np.repeat(state, len(cases_deaths["Date"].unique()))
        mystateABV = np.repeat(stateABV, len(cases_deaths["Date"].unique()))
        mystatefips = np.repeat(fipsNum, len(cases_deaths["Date"].unique()))
        mydate = cases_deaths["Date"].unique()

        ### Insert data
        myStateData.insert(0, "stateFIPS", mystatefips)
        myStateData.insert(0, "StateABV", mystateABV)
        myStateData.insert(0, "State", state)
        myStateData.insert(0, "Date", date)

        ### Stack state datas
        StateData = pd.concat([StateData, myStateData])

    ### Reset indicies
    StateData = StateData.set_index(np.arange(0,len(StateData)))

    StateData

    ### First for date
    ### Aggregate data
    USAData = StateData[StateData['Date'] == StateData["Date"].unique()[0]].groupby("Date").agg(
            TotalCases = pd.NamedAgg(column = "TotalCases", aggfunc = sum),
            TotalDeaths = pd.NamedAgg(column = "TotalDeaths", aggfunc = sum),
            Population = pd.NamedAgg(column = "Population", aggfunc = sum))

    ### Insert into usaData
    USAData.insert(0, "Date", StateData["Date"].unique()[0])
    USAData.insert(0, "Country", "United States")


    ### For the rest of dates
    for day in StateData["Date"].unique()[1:]:
        ### Aggregate data
        myUSAData = StateData[StateData['Date'] == day].groupby("Date").agg(
            TotalCases = pd.NamedAgg(column = "TotalCases", aggfunc = sum),
            TotalDeaths = pd.NamedAgg(column = "TotalDeaths", aggfunc = sum),
            Population = pd.NamedAgg(column = "Population", aggfunc = sum))

        ### Insert date into data
        myUSAData.insert(0, "Date", day)
        myUSAData.insert(0, "Country", "United States")

        ### Stack state datas
        USAData = pd.concat([USAData, myUSAData])



    ### Reset indicies
    USAData = USAData.set_index(np.arange(0,len(USAData)))

    USAData

    cases_deaths2 = parallel1(cases_deaths2, newCases1)
    cases_deaths2

    cases_deaths2 = parallel1(cases_deaths2, newDeaths1)
    cases_deaths2

    StateData = parallel2(StateData, newCases2)
    StateData

    StateData = parallel2(StateData, newDeaths2)
    StateData

    ### New Cases
    USAData["New Cases"] = abs(np.diff(USAData["TotalCases"], prepend = USAData["TotalCases"].iloc[0]))

    ### New Deaths
    USAData["New Deaths"] = abs(np.diff(USAData["TotalDeaths"], prepend = USAData["TotalDeaths"].iloc[0]))

    USAData

    ### Percent of population that have cases.
    cases_deaths2["%Cases"] = np.where(cases_deaths2["Population"] != 0,
                                       round((cases_deaths2["Total Cases"] / cases_deaths2["Population"]) * 100, 3),
                                       0)

    ### Percent of population that have died.
    cases_deaths2["%Deaths"] = np.where(cases_deaths2["Population"] != 0,
                                        round((cases_deaths2["Total Deaths"] / cases_deaths2["Population"]) * 100, 3),
                                        0)

    cases_deaths2

    ### Percent of population that have cases.
    StateData["%Cases"] = np.where(StateData["Population"] != 0,
                                   round((StateData["TotalCases"] / StateData["Population"]) * 100, 3),
                                   0)

    ### Percent of population that have died.
    StateData["%Deaths"] = np.where(StateData["Population"] != 0,
                                    round((StateData["TotalDeaths"] / StateData["Population"]) * 100, 3),
                                    0)

    StateData

    ### Percent of population that have cases.
    USAData["%Cases"] = np.where(USAData["Population"] != 0,
                                 round((USAData["TotalCases"] / USAData["Population"]) * 100, 3),
                                 0)

    ### Percent of population that have died.
    USAData["%Deaths"] = np.where(USAData["Population"] != 0,
                                  round((USAData["TotalDeaths"] / USAData["Population"]) * 100, 3),
                                  0)

    USAData

    cases_deaths2["log(Total Cases)"] = round(np.log(cases_deaths2["Total Cases"]), 3)

    cases_deaths2["log(Total Deaths)"] = round(np.log(cases_deaths2["Total Deaths"]), 3)

    cases_deaths2["log(New Cases)"] = round(np.log(cases_deaths2["New Cases"]), 3)

    cases_deaths2["log(New Deaths)"] = round(np.log(cases_deaths2["New Deaths"]), 3)

    cases_deaths2

    StateData["log(Total Cases)"] = round(np.log(StateData["TotalCases"]), 3)

    StateData["log(Total Deaths)"] = round(np.log(StateData["TotalDeaths"]), 3)

    StateData["log(New Cases)"] = round(np.log(StateData["New Cases"]), 3)

    StateData["log(New Deaths)"] = round(np.log(StateData["New Deaths"]), 3)

    StateData

    USAData["log(Total Cases)"] = round(np.log(USAData["TotalCases"]), 3)

    USAData["log(Total Deaths)"] = round(np.log(USAData["TotalDeaths"]), 3)

    USAData["log(New Cases)"] = round(np.log(USAData["New Cases"]), 3)

    USAData["log(New Deaths)"] = round(np.log(USAData["New Deaths"]), 3)

    USAData

    StateData = StateData.rename(columns = {"TotalCases" : "Total Cases",
                                            "TotalDeaths" : "Total Deaths"})
    StateData

    USAData = USAData.rename(columns = {"TotalCases" : "Total Cases",
                                            "TotalDeaths" : "Total Deaths"})
    USAData

    StateData = StateData.astype({"State" : "category",
                                  "stateFIPS" : "str"})
    StateData.info()

    USAData = USAData.astype({"Country" : "category"})
    USAData.info()

    CountyData = cases_deaths2

    ### Google Mobility data
    !curl https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv?cachebust=7d0cb7d254d29111 --output data/mobility.csv

    GoogleMobility = pd.read_csv("data/mobility.csv", dtype = "str")
    GoogleMobility

    ### Keep only US
    GoogleMobility = GoogleMobility[GoogleMobility["country_region_code"] == "US"]
    GoogleMobility

    ### Mobility data for whole country
    GoogleUsaMobility = GoogleMobility[GoogleMobility["sub_region_1"].isnull()]

    ### Mobility data for states
    GoogleStateMobility = GoogleMobility[(GoogleMobility["sub_region_1"].isnull() != True) & (GoogleMobility["sub_region_2"].isnull())]

    ### Mobility data for counties
    GoogleCountyMobility = GoogleMobility[GoogleMobility["sub_region_2"].isnull() != True]

    ### Drop columns from usaMobility
    GoogleUsaMobility = GoogleUsaMobility.drop(columns = ["country_region_code", "sub_region_1",
                                              "sub_region_2", "iso_3166_2_code",
                                              "census_fips_code"])

    ### Drop columns from stateMobility
    GoogleStateMobility = GoogleStateMobility.drop(columns = ["country_region_code", "country_region", 
                                                  "sub_region_2", "iso_3166_2_code", 
                                                  "census_fips_code"])

    ### Drop columns from countyMobility
    GoogleCountyMobility = GoogleCountyMobility.drop(columns = ["country_region_code", "country_region",
                                                    "sub_region_1", "iso_3166_2_code"])

    ### Rename usaMobility columns
    GoogleUsaMobility = GoogleUsaMobility.rename(columns = {"country_region" : "Country",
                                                "date" : "Date",
                                                "retail_and_recreation_percent_change_from_baseline" : "%Retail/Rec Change",
                                                "grocery_and_pharmacy_percent_change_from_baseline" : "%Grocery/Pharm Change",
                                                "parks_percent_change_from_baseline" : "%Parks Change",
                                                "transit_stations_percent_change_from_baseline" : "%Transit Change",
                                                "workplaces_percent_change_from_baseline" : "%Workplace Change",
                                                "residential_percent_change_from_baseline" : "%Residential Change"})
    GoogleUsaMobility = GoogleUsaMobility.astype({"Date" : "datetime64"})


    ### Rename stateMobility columns
    GoogleStateMobility = GoogleStateMobility.rename(columns = {"sub_region_1" : "State",
                                                "date" : "Date",
                                                "retail_and_recreation_percent_change_from_baseline" : "%Retail/Rec Change",
                                                "grocery_and_pharmacy_percent_change_from_baseline" : "%Grocery/Pharm Change",
                                                "parks_percent_change_from_baseline" : "%Parks Change",
                                                "transit_stations_percent_change_from_baseline" : "%Transit Change",
                                                "workplaces_percent_change_from_baseline" : "%Workplace Change",
                                                "residential_percent_change_from_baseline" : "%Residential Change"})
    GoogleStateMobility = GoogleStateMobility.astype({"Date" : "datetime64"})


    ### Rename countyMobility columns
    GoogleCountyMobility = GoogleCountyMobility.rename(columns = {"sub_region_2" : "County Name",
                                                "census_fips_code" : "countyFIPS",
                                                "date" : "Date",
                                                "retail_and_recreation_percent_change_from_baseline" : "%Retail/Rec Change",
                                                "grocery_and_pharmacy_percent_change_from_baseline" : "%Grocery/Pharm Change",
                                                "parks_percent_change_from_baseline" : "%Parks Change",
                                                "transit_stations_percent_change_from_baseline" : "%Transit Change",
                                                "workplaces_percent_change_from_baseline" : "%Workplace Change",
                                                "residential_percent_change_from_baseline" : "%Residential Change"})
    GoogleCountyMobility = GoogleCountyMobility.astype({"Date" : "datetime64"})


    ### Re-label District of Columbia as DC
    DCindex = list(GoogleStateMobility["State"][GoogleStateMobility["State"] == "District of Columbia"].index)
    for index in DCindex:
        GoogleStateMobility["State"][index] = "DC"

    ### Go grab data
    !curl https://data.cdc.gov/api/views/9bhg-hcku/rows.csv?accessType=DOWNLOAD --output data/sexage.csv

    ### Read in data
    DeathsSexAge = pd.read_csv("data/sexage.csv")
    DeathsSexAge

    DeathsSexAge = DeathsSexAge.drop(columns = ["Total Deaths",
                                                "Pneumonia Deaths",
                                                "Pneumonia and COVID-19 Deaths",
                                                "Influenza Deaths", 
                                                "Pneumonia, Influenza, or COVID-19 Deaths",
                                                "Footnote"])
    DeathsSexAge

    ### Drop Puerto Rico, Puerto Rico Total
    PRindex = list(DeathsSexAge["State"][(DeathsSexAge["State"] == "Puerto Rico") | (DeathsSexAge["State"] == "Puerto Rico Total")].index)
    DeathsSexAge = DeathsSexAge.drop(index = PRindex)
    DeathsSexAge


    ### Rename DC
    DCindex = list(DeathsSexAge["State"][DeathsSexAge["State"] == "District of Columbia"].index)
    DeathsSexAge["State"][DCindex] = "DC"

    DeathsSexAge["State"].unique()

    ### Go grab data
    !curl https://data.cdc.gov/api/views/pj7m-y5uh/rows.csv?accessType=DOWNLOAD --output data/race.csv

    ### Read in Data
    race = pd.read_csv("data/race.csv")
    race

    race = race.drop(columns = "Footnote")
    race

    ### Drop NYC.
    NYCindex = list(race["State"][race["State"] == "New York City"].index)
    race = race.drop(index = NYCindex)

    ### Rename New York<sup>5</sup> to New York.
    NYindex = list(race["State"][race["State"] == "New York<sup>5</sup>"].index)
    race["State"][NYindex] = "New York"

    ### Rename DC
    DCindex = list(race["State"][race["State"] == "District of Columbia"].index)
    race["State"][DCindex] = "DC"

    race["State"].unique()

    countDeaths = race[race["Indicator"] == "Count of COVID-19 deaths"]
    distDeaths = race[race["Indicator"] == "Distribution of COVID-19 deaths (%)"]
    unweightDeaths = race[race["Indicator"] == "Unweighted distribution of population (%)"]
    weightDeaths = race[race["Indicator"] == "Weighted distribution of population (%)"]

    ### Unpivot
    countDeaths = pd.melt(countDeaths, id_vars = ["Data as of","State", "Indicator"],
           value_vars = countDeaths.columns[3:9],
           var_name = "Race", value_name = "Count of COVID-19 deaths")
    countDeaths

    ### Drop Indicator
    countDeaths = countDeaths.drop(columns = "Indicator")
    countDeaths

    ### Unpivot
    distDeaths = pd.melt(distDeaths, id_vars = ["Data as of","State", "Indicator"],
           value_vars = distDeaths.columns[3:9],
           var_name = "Race", value_name = "Distribution of COVID-19 deaths (%)")
    distDeaths

    ### Drop Indicator
    distDeaths = distDeaths.drop(columns = "Indicator")
    distDeaths

    ### Unpivot
    unweightDeaths = pd.melt(unweightDeaths, id_vars = ["Data as of","State", "Indicator"],
           value_vars = unweightDeaths.columns[3:9],
           var_name = "Race", value_name = "Unweighted distribution of population (%)")
    unweightDeaths

    ### Drop Indicator
    unweightDeaths = unweightDeaths.drop(columns = "Indicator")
    unweightDeaths

    ### Unpivot
    weightDeaths = pd.melt(weightDeaths, id_vars = ["Data as of","State", "Indicator"],
           value_vars = weightDeaths.columns[3:9],
           var_name = "Race", value_name = "Weighted distribution of population (%)")
    weightDeaths

    ### Drop Indicator
    weightDeaths = weightDeaths.drop(columns = "Indicator")
    weightDeaths

    raceNew = countDeaths.merge(distDeaths, how = "inner", on = ["Data as of", "State", "Race"])
    raceNew = raceNew.merge(unweightDeaths, how = "inner", on = ["Data as of", "State", "Race"])
    raceNew = raceNew.merge(weightDeaths, how = "inner", on = ["Data as of", "State", "Race"])
    raceNew

    ### Go grab data
    !curl https://www.cdc.gov/nhsn/pdfs/covid19/covid19-NatEst.csv --output data/hospital.csv

    ### Load in data
    hospital = pd.read_csv("data/hospital.csv")
    hospital

    ### Drop the Notes & state columns and the first row.
    hospital = hospital.drop(columns = ["state", "Notes"])
    hospital = hospital.drop(index = 0)
    hospital = hospital.reset_index(drop = True)
    hospital

    ### Rename columns
    hospital = hospital.rename(columns = {'statename' : "State", 
                                          'collectionDate': "Date"})
    hospital

    ### Convert Date into datetime
    hospital = hospital.astype({"Date" : "datetime64"})
    hospital

    ### Remove Puerto Rico 
    PRindex = list(hospital["State"][hospital["State"] == "Puerto Rico"].index)
    hospital = hospital.drop(index = PRindex)

    ### Rename DC
    DCindex = list(hospital["State"][hospital["State"] == "District of Columbia"].index)
    hospital["State"][DCindex] = "DC"

    hospital["State"].unique()

    CountyData.to_csv("data/countyData.csv", index = False)
    StateData.to_csv("data/stateData.csv", index = False)
    USAData.to_csv("data/usaData.csv", index = False)
    DeathsSexAge.to_csv("data/demoDeaths.csv", index = False)
    raceNew.to_csv("data/raceDeaths.csv", index = False)
    hospital.to_csv("data/hospitalData.csv", index = False)
    GoogleUsaMobility.to_csv('data/GoogleUsaMobility.csv', index = False)
    GoogleStateMobility.to_csv('data/GoogleStateMobility.csv', index = False)
    GoogleCountyMobility.to_csv('data/GoogleCountyMobility.csv', index = False)