In [2]:
import pandas as pd
import numpy as np
from pymongo import MongoClient

In [3]:
client = MongoClient('localhost', 27017)
db = client["MDAProjectDatabase"]

## Cases and deaths
- by day and county
- prepared by Aleksanda

### CSV to Parquet conversion

In [None]:
# pd.read_csv(
#     "../data/cases_daily_csv.csv", 
#     usecols=["date", "fips", "cases", "deaths"]
# ).to_parquet(
#     "../data/cases_dailt.parquet",
#     engine="pyarrow", 
#     compression="brotli"
# )

In [24]:
df_cases = pd.read_parquet("../data/cases_daily.parquet")
df_cases.fips = df_cases.fips.astype(str).str.zfill(5)
df_cases.head()

Unnamed: 0,date,fips,cases,deaths
0,2020-01-21,1001,0,0
1,2020-01-22,1001,0,0
2,2020-01-23,1001,0,0
3,2020-01-24,1001,0,0
4,2020-01-25,1001,0,0


In [76]:
# data_dict_by_fips = [{"fips": fips, "data": df.drop(columns=["fips"]).to_dict("records")} for fips, df in df_cases.groupby("fips")]
# client["MDAProjectDatabase"]["daily_covid_cases"].insert_many(data_dict_by_fips)

In [25]:
client["MDAProjectDatabase"]["daily_covid_cases"].insert_many(df_cases.to_dict("records"))

<pymongo.results.InsertManyResult at 0x7f10e00af8b0>

## Socio-economic data

In [None]:
df_socio_economic = pd.read_csv("../data/non_epidemic_data.csv")
print(df_socio_economic.shape)
df_socio_economic.head()

In [None]:
fipses_cases = pd.read_csv("../data/county_daily_csv.csv", usecols=["fips"]).fips.values

df_socio_economic = df_socio_economic.loc[df_socio_economic.fips.isin(fipses_cases), :]
df_socio_economic.shape

In [None]:
client["MDAProjectDatabase"]["socio_economic_data"].insert_many(df_socio_economic.drop(columns=["area"]).to_dict("records"))

## FIPS dictionary

In [None]:
df_fips = pd.read_csv("../data/fips.csv")

df_fips_states = df_fips.loc[df_fips.fips.values % 1000 == 0, :].copy()
df_fips_states.loc[:, "fips_state"] = [f"{int(fips / 1000):02d}" for fips in df_fips_states.fips]
df_fips_states = df_fips_states.drop(columns=["fips"]).rename(columns={"area": "state_name"})

df_fips = df_fips.loc[df_fips.fips.values % 1000 != 0, :]
df_fips.loc[:, "fips_state"] = [f"{fips:05d}"[:2] for fips in df_fips.fips]

df_fips = pd.merge(
    df_fips,
    df_fips_states,
    on="fips_state"
).drop(columns=["fips_state"])

df_fips.fips = df_fips.fips.astype(str).str.zfill(5)

df_fips.head()

In [None]:
df_fips.head().to_dict("records")

In [None]:
client["MDAProjectDatabase"]["fips_codes"].insert_many(df_fips.to_dict("records"))

## Time series forecasts

In [56]:
df_fcst = pd.read_csv("../data/country_level_cases_with_forecast.csv")
df_fcst.head()

Unnamed: 0,date,cases,Forecast 1,Forecast 2
0,2020-01-21,1.0,,
1,2020-01-22,0.0,,
2,2020-01-23,0.0,,
3,2020-01-24,1.0,,
4,2020-01-25,1.0,,


In [57]:
dict_list_no_nan = df_fcst.astype(object).where(df_fcst.notna(), None).to_dict("records")
dict_list_no_nan[0]

{'date': '2020-01-21', 'cases': 1.0, 'Forecast 1': None, 'Forecast 2': None}

In [58]:
db["country_level_cases_with_forecasts"].insert_many(dict_list_no_nan)

<pymongo.results.InsertManyResult at 0x7f0ff1ea0310>

## Counterfactual models

In [4]:
# df_counterfactual1 = pd.read_csv("../data/counterfactual_results_model_1.csv")
# df_counterfactual1.head()

Unnamed: 0,date,variable,state_code,value
0,2020-09-30,mask_requirement,Mississippi,1513.580688
1,2020-09-30,no_mask_requirement,Mississippi,1513.463135
2,2020-10-01,mask_requirement,Mississippi,1536.039429
3,2020-10-01,no_mask_requirement,Mississippi,1529.435791
4,2020-10-02,mask_requirement,Michigan,984.426392


In [6]:
df_counterfactual2 = pd.read_csv("../data/counterfactual_results_model_2.csv").rename(columns={"state_code": "state_name"})
df_counterfactual2.head()

Unnamed: 0,date,variable,state_name,value
0,2020-09-30,mask_requirement,Mississippi,1555.972046
1,2020-09-30,no_mask_requirement,Mississippi,1573.680298
2,2020-10-01,mask_requirement,Mississippi,1587.778442
3,2020-10-01,no_mask_requirement,Mississippi,1636.114868
4,2020-10-02,mask_requirement,Michigan,1010.964966


In [7]:
db["counterfactual_model_results"].insert_many(df_counterfactual2.to_dict("records"))

<pymongo.results.InsertManyResult at 0x7f6983f360d0>

## Vaccinations by state

In [33]:
df_abrev_state = pd.read_csv("../data/state_abrev_dict.csv")
df_abrev_state.fips_state = df_abrev_state.fips_state.astype(str).str.zfill(2)

df_vaccinations = pd.read_csv(
    "../data/vaccinations_by_state.csv", 
    dtype={"fully_vaccinated": int, "total_pop": int}
) \
    .rename(columns={"state_code": "abrev_state"})\
    .drop(columns=["pct_vaccinated"])

df_vaccinations = pd.merge(
    df_vaccinations,
    df_abrev_state,
    on="abrev_state",
    how="left"
).drop(columns=["abrev_state"])

df_vaccinations = pd.merge(
    df_vaccinations,
    pd.read_csv("../data/fips_states.csv", dtype={"fips_state": str}),
    on="fips_state",
    how="left"
).drop(columns=["fips_state"])

df_vaccinations.state_name = df_vaccinations.state_name.str.title()
df_vaccinations = df_vaccinations.loc[:, ["date", "state_name", "fully_vaccinated", "total_pop"]]

df_vaccinations

Unnamed: 0,date,state_name,fully_vaccinated,total_pop
0,2020-12-13,Alaska,0,731545
1,2020-12-14,Alaska,0,731545
2,2020-12-15,Alaska,0,731545
3,2020-12-16,Alaska,0,731545
4,2020-12-17,Alaska,0,731545
...,...,...,...,...
29155,2022-06-01,Wyoming,287243,578759
29156,2022-06-02,Wyoming,287284,578759
29157,2022-06-03,Wyoming,287333,578759
29158,2022-06-04,Wyoming,287395,578759


In [34]:
db["vaccinations"].insert_many(df_vaccinations.to_dict("records"))

<pymongo.results.InsertManyResult at 0x7f69f0198c70>