In [188]:
# Import necessary packages
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import BayesianEstimator
import pyro
import pyro.distributions as dist
from pyro.infer import Importance, EmpiricalMarginal
import torch
import numpy as np
import xarray

In [212]:
# Read in the dataset
data = pd.read_csv("../datasets/dag_data.csv")

# Create probability tensors for Pyro DAG 

### 1. Construct conditional probability tables from the dataset:

In [3]:
# Get probabilities of Citizen Political Leaning category
cpl_proba_df = data.CPL.value_counts() / len(data)
# Get probabilities of High School category
hs_proba_df = data.HS.value_counts() / len(data)
# Get probabilities of Median Income category
mi_proba_df = data.MI.value_counts() / len(data)
# Get probabilities of Urban Influence category
ui_proba_df = data.UI.value_counts() / len(data)
# Get probabilities of State Political Leaning category
spl_proba_df = data.SPL.value_counts() / len(data)
# Get conditional probabilities of ICU category
icu_proba_df = pd.crosstab(data.ICU, data.UI, normalize='columns')
# Get conditional probabilities of Ban on Large Gatherings category
blg_proba_df = pd.crosstab(data.BLG, data.SPL, normalize='columns')
# Get conditional probabilities of CDC abidance category
cdc_proba_df = pd.crosstab(data.CDC, [data.HS, data.MI, data.CPL], normalize='columns')
# Get conditional probabilities of outcome variable, County Confirmed Case Rate
cc_proba_df = pd.crosstab(data.CC, [data.ICU, data.BLG, data.CDC], normalize='columns')


In [230]:
cdc_proba_df = pd.crosstab(data.CDC, [data.HS, data.MI, data.CPL], normalize='columns', dropna=False)
cdc_proba_df

HS,High,High,High,High,High,High,High,High,High,High,...,Very Low,Very Low,Very Low,Very Low,Very Low,Very Low,Very Low,Very Low,Very Low,Very Low
MI,High,High,High,High,High,High,High,Low,Low,Low,...,Very High,Very High,Very High,Very Low,Very Low,Very Low,Very Low,Very Low,Very Low,Very Low
CPL,Democrat,Even,Heavily Democrat,Heavily Republican,Leaning Democrat,Leaning Republican,Republican,Democrat,Even,Heavily Democrat,...,Leaning Democrat,Leaning Republican,Republican,Democrat,Even,Heavily Democrat,Heavily Republican,Leaning Democrat,Leaning Republican,Republican
CDC,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
AlwaysWearMasks,1.0,1.0,0.0,0.953846,1.0,1.0,1.0,0.0,0.857143,1.0,...,1.0,1.0,0.972973,0.0,1.0,0.0,0.888889,1.0,1.0,0.0
FrequentlyWearMasks,0.0,0.0,0.0,0.046154,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0
NeverWearMasks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
RarelyWearMasks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
SometimesWearMasks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 2. Convert conditional probability tables to tensors:

In [189]:
def convert_to_tensor(df):
    arrays = [arr for arr in df.values]
    return torch.tensor(np.stack(arrays).tolist())

hs_proba = convert_to_tensor(hs_proba_df)
mi_proba = convert_to_tensor(mi_proba_df)
ui_proba = convert_to_tensor(ui_proba_df)
spl_proba = convert_to_tensor(spl_proba_df)
cpl_proba = convert_to_tensor(cpl_proba_df)
icu_proba = convert_to_tensor(icu_proba_df)
cdc_proba = convert_to_tensor(cdc_proba_df)
blg_proba = convert_to_tensor(blg_proba_df)
cc_proba = convert_to_tensor(cc_proba_df)



# Construct a DAG model in Pyro to experiment with interventions and conditional modeling

### 1. Construct a DAG using the pgmpy Python package, fit the DAG on the dataset, then   extract conditional probability tables for each variable.

#### Construct a DAG using the pgmpy package.
We will model interventions and inferences on the DAG using pyro, but loading the data into a pygmy Bayesian network will allow us to easily transfer large conditional probability tables to pyro. 

In [9]:
# Create a Bayesian model using pgmpy to represent the DAG.
# bayes_model = BayesianModel([('CPL', 'CDC'), ('HS', 'CDC'), ('MI', 'CDC'), ('CDC', 'CC'), 
#                           ('UI', 'ICU'),('ICU', 'CC'), ('SPL', 'BLG'), ('BLG', 'CC')])

#### Fit the model on the cleaned dataset.

In [10]:
# Fit the Bayesian model on the dataset.
# bayes_model.fit(data)

#### Extract conditional probability tables for every variable in the DAG.

In [11]:
# Extract conditional probability tables from each variable in the DAG.
# cpl_probs = torch.tensor(bayes_model.get_cpds(node="CPL").values.T)
# cdc_probs = torch.tensor(bayes_model.get_cpds(node="CDC").values.T)
# hs_probs = torch.tensor(bayes_model.get_cpds(node="HS").values.T)
# mi_probs = torch.tensor(bayes_model.get_cpds(node="MI").values.T)
# ui_probs = torch.tensor(bayes_model.get_cpds(node="UI").values.T)
# icu_probs = torch.tensor(bayes_model.get_cpds(node="ICU").values.T)
# spl_probs = torch.tensor(bayes_model.get_cpds(node="SPL").values.T)
# blg_probs = torch.tensor(bayes_model.get_cpds(node="BLG").values.T)
# cc_probs = torch.tensor(bayes_model.get_cpds(node="CC").values.T)

# cdc_probs

### 2. Construct a DAG in Pyro, built upon the conditional probability tables extracted from the pgmpy Bayesian network.

In [40]:
def cc_pyro_model():
    CPL = pyro.sample("CPL", dist.Categorical(probs=cpl_proba)) 
    HS = pyro.sample("HS", dist.Categorical(probs=hs_proba)) 
    MI = pyro.sample("MI", dist.Categorical(probs=mi_proba)) 
    UI = pyro.sample("UI", dist.Categorical(probs=ui_proba))
    SPL = pyro.sample("SPL", dist.Categorical(probs=spl_proba))
    CDC = pyro.sample("CDC", dist.Categorical(probs=cdc_proba[HS][MI][CPL]))  # ???????
    ICU = pyro.sample("ICU", dist.Categorical(probs=icu_proba[UI]))
    BLG = pyro.sample("BLG", dist.Categorical(probs=blg_proba[SPL]))
    CC = pyro.sample("CC", dist.Categorical(probs=cc_proba[ICU][BLG][CDC])) # ????????
    return{"CPL": CPL, "CDC": CDC, "HS" : HS, "MI": MI, "UI": UI,
           "ICU": ICU, "SPL" : SPL, "BLG" : BLG, "CC" : CC} 

print(cc_pyro_model())

IndexError: too many indices for tensor of dimension 0

##  ^ TODO: Look at the order of conditional variables in the above CDC and CC conditional probability tensors. How should they be ordered? I guessed on the order of tensors

# Test Hypotheses about the Data

### Hypothesis 1. Counties in which there are fewer ICU beds report fewer COVID-19 confirmed cases per 100,000 people. 

In [None]:
# Determine the tensor index that corresponds to ICU = low
bayes_model.get_cpds(node="ICU").state_names["ICU"]


In [None]:
# Create conditioned model where ICU = "low"
icu_conditioned_model = pyro.condition(cc_pyro_model, data={'ICU':torch.tensor(1)})

###  Hypothesis 2. Counties that ban large gatherings report more COVID-19 confirmed cases per 100,000 people.