# Simple Model
In our simpler model, we will just model each post as posting about a story coming from one of three groups:
- Factual, Disputed Story
- Fake, Disputed Story
- Corrective Story

Have found this to be a useful resource for a hierarchcal model example: https://github.com/pyro-ppl/pyro/blob/dev/examples/baseball.py
As well as https://pyro.ai/examples/forecasting_iii.html

# To start, we will use dummy data

In [273]:
import pandas as pd
import torch
import pyro
from pyro.infer import MCMC, NUTS
import pyro.distributions as dist
from pyro.distributions.util import scalar_like
from torch.distributions import constraints

In [274]:
pyro.enable_validation(__debug__)
pyro.set_rng_seed(0)

In [275]:
data = pd.DataFrame({"Type": ["Fake", "Fact", "Corrective", "Fake", "Fact", "Corrective", "Fake", "Fact", "Corrective"],
                     "CommentsFirstHour": [100, 50, 20, 250, 100, 40, 125, 150, 30],
                     "Engagement": [1000, 800, 300, 3000, 2500, 500, 1500, 1600, 1000]})
data

Unnamed: 0,Type,CommentsFirstHour,Engagement
0,Fake,100,1000
1,Fact,50,800
2,Corrective,20,300
3,Fake,250,3000
4,Fact,100,2500
5,Corrective,40,500
6,Fake,125,1500
7,Fact,150,1600
8,Corrective,30,1000


In [276]:
data = torch.Tensor([[[1, 100, 1000], [1, 250, 3000], [1, 125, 1500], [1, 150, 1500]],
                     [[1, 50,  800],  [1, 100, 2500], [1, 150, 1600], [1, 125, 1200]],
                     [[1, 20,  300],  [1, 40,  500],  [1, 30,  1000], [1, 35, 600]]])
data = data.transpose(1,2)
# dim 0: Type: (Fake, Fact, Corrective)
# dim 1: post-level vars: (bias, commentsFirstHour, Engagement) 
# dim 2: obs (post)

In [277]:
data.shape

torch.Size([3, 3, 4])

In [278]:
data

tensor([[[1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00],
         [1.0000e+02, 2.5000e+02, 1.2500e+02, 1.5000e+02],
         [1.0000e+03, 3.0000e+03, 1.5000e+03, 1.5000e+03]],

        [[1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00],
         [5.0000e+01, 1.0000e+02, 1.5000e+02, 1.2500e+02],
         [8.0000e+02, 2.5000e+03, 1.6000e+03, 1.2000e+03]],

        [[1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00],
         [2.0000e+01, 4.0000e+01, 3.0000e+01, 3.5000e+01],
         [3.0000e+02, 5.0000e+02, 1.0000e+03, 6.0000e+02]]])

In [279]:
x = data[:,:-1,:]
y = data[:,-1,:]

In [280]:
x

tensor([[[  1.,   1.,   1.,   1.],
         [100., 250., 125., 150.]],

        [[  1.,   1.,   1.,   1.],
         [ 50., 100., 150., 125.]],

        [[  1.,   1.,   1.,   1.],
         [ 20.,  40.,  30.,  35.]]])

In [281]:
x.shape

torch.Size([3, 2, 4])

In [282]:
y

tensor([[1000., 3000., 1500., 1500.],
        [ 800., 2500., 1600., 1200.],
        [ 300.,  500., 1000.,  600.]])

In [292]:
# x is a 3D tensor
def model(x, y):
    num_types, num_indeps, num_posts = x.shape
    
    # construct necessary plates over each level
    type_plate = pyro.plate("type", num_types)
    indep_plate = pyro.plate("indep", num_indeps)
    post_plate = pyro.plate("post", num_posts)

    for t in type_plate:
        type_level_coefs = torch.empty((num_indeps,))
        for i in indep_plate:
            coef = pyro.sample(f"type_{t}_coef_{i}", dist.Normal(0, 10)) # sample the type level coefs
            type_level_coefs[i] = coef
        
        std = pyro.sample(f"type_{t}_std", dist.Uniform(0., 10.)) # sample the y std
        for p in post_plate: # note: currently assumes same number of samples across all types. not always true.
            mu = torch.dot(type_level_coefs, x[t,:,p])
            pyro.sample(f"obs_{t}_{p}", dist.Normal(mu, std), obs=y[t,p])
    
    

In [293]:
# x is a 3D tensor
def guide(x, y):
    num_types, num_indeps, num_posts = x.shape
    
    # construct necessary plates over each level
    type_plate = pyro.plate("type", num_types)
    indep_plate = pyro.plate("indep", num_indeps)
    post_plate = pyro.plate("post", num_posts)
    
    type_coef_locs = torch.empty((num_types,))
    type_coef_scales = torch.empty((num_types,))
    for t in type_plate:
        for i in indep_plate:
            type_coef_locs[t,i] = pyro.param(f'type_{t}_coef_{i}_loc', torch.Tensor(0.))
            type_coef_scales[t,i] = pyro.param(f'type_{t}_coef_{i}_scale', torch.Tensor(1.), constraint=constraints.positive)

    for t in type_plate:
        type_level_coefs = torch.empty((num_indeps,))
        for i in indep_plate:
            coef = pyro.param(f"type_{t}_coef_{i}", dist.Normal(type_coef_locs[t,i], type_coef_scales[t,i])) # sample the type level coefs
            type_level_coefs[i] = coef
        
        std = pyro.param(f"type_{t}_std", dist.Uniform(0., 10.), constraint=constraints.positive) # sample the y std
    
    

In [294]:
nuts_kernel = NUTS(model)

mcmc = MCMC(nuts_kernel, num_samples=2000, warmup_steps=250)
mcmc.run(x, y)

hmc_samples = {k: v.detach().cpu().numpy() for k, v in mcmc.get_samples().items()}

Warmup:   0%|          | 0/2250 [00:00, ?it/s]Warmup:   0%|          | 1/2250 [00:00,  3.78it/s, step size=1.40e-02, acc. prob=1.000]Warmup:   0%|          | 6/2250 [00:00,  5.21it/s, step size=1.80e-02, acc. prob=0.833]Warmup:   1%|          | 12/2250 [00:00,  7.16it/s, step size=1.20e-02, acc. prob=0.806]Warmup:   1%|          | 15/2250 [00:00,  7.98it/s, step size=7.50e-02, acc. prob=0.844]Warmup:   0%|          | 0/2250 [01:21, ?it/s]
Warmup:   0%|          | 0/2250 [00:49, ?it/s]
Warmup:   1%|          | 18/2250 [00:04,  2.28it/s, step size=1.87e-02, acc. prob=0.812]Warmup:   1%|          | 16/2250 [00:16,  4.37it/s, step size=3.10e-02, acc. prob=0.781]Warmup:   1%|          | 20/2250 [00:07,  1.19it/s, step size=3.73e-02, acc. prob=0.822]Warmup:   1%|          | 22/2250 [00:08,  1.44it/s, step size=1.72e-02, acc. prob=0.809]Warmup:   1%|          | 23/2250 [00:09,  1.19it/s, step size=2.93e-02, acc. prob=0.816]Warmup:   1%|          | 24/2250 [00:10,  1.07it/s, step 

In [305]:
TYPES = ["Fake", "Fact", "Corrective"]
# Utility function to print latent sites' quantile information.
def summary_types(samples):
    site_stats = {}
    i = 0
    for site_name, values in samples.items():
#         values = values.reshape((values.shape[0], values.shape[1]))
        marginal_site = pd.DataFrame(values)
        describe = marginal_site.describe(percentiles=[.05, 0.25, 0.5, 0.75, 0.95]).transpose()
        site_stats[site_name] = describe[["mean", "std", "5%", "25%", "50%", "75%", "95%"]]
#         site_stats[site_name]["type"] = TYPES
        i += 1
    return site_stats

In [306]:
hmc_samples

{'type_0_coef_0': array([-93.59751, -77.82268, -89.95822, ..., -92.07821, -79.53466,
        -92.64587], dtype=float32),
 'type_0_coef_1': array([11.905197, 11.927382, 11.929324, ..., 12.00745 , 11.934976,
        11.882766], dtype=float32),
 'type_0_std': array([9.993318, 9.985207, 9.969162, ..., 9.995724, 9.969778, 9.995468],
       dtype=float32),
 'type_1_coef_0': array([294.46323, 278.01688, 256.29465, ..., 271.94638, 273.7301 ,
        269.09692], dtype=float32),
 'type_1_coef_1': array([10.963751, 11.152698, 11.238733, ..., 11.131603, 11.115335,
        11.145469], dtype=float32),
 'type_1_std': array([9.999019 , 9.9998455, 9.999798 , ..., 9.998522 , 9.999893 ,
        9.99931  ], dtype=float32),
 'type_2_coef_0': array([53.659748, 55.28672 , 59.38068 , ..., 70.84486 , 58.138477,
        66.31885 ], dtype=float32),
 'type_2_coef_1': array([16.971773, 17.047106, 16.771217, ..., 16.515991, 16.812979,
        16.679924], dtype=float32),
 'type_2_std': array([9.9997225, 9.999964 , 9

In [307]:

for site, values in summary_types(hmc_samples).items():
    print("Coefficient: {}".format(site))
    print(values, "\n")

Coefficient: type_0_coef_0
        mean       std          5%        25%        50%        75%        95%
0 -90.337471  8.280522 -104.201505 -96.023737 -90.277035 -84.704821 -76.764779 

Coefficient: type_0_coef_1
        mean       std         5%       25%        50%        75%       95%
0  11.921515  0.056123  11.829146  11.88442  11.920927  11.960093  12.01414 

Coefficient: type_0_std
       mean       std        5%       25%       50%       75%       95%
0  9.986087  0.013445  9.960134  9.980032  9.990085  9.995616  9.999214 

Coefficient: type_1_coef_0
         mean       std          5%         25%         50%         75%  \
0  273.691162  8.231652  260.551021  267.892303  273.684235  279.342613   

          95%  
0  287.816743   

Coefficient: type_1_coef_1
        mean       std         5%        25%        50%        75%        95%
0  11.135045  0.082909  11.001281  11.078073  11.136253  11.191784  11.271496 

Coefficient: type_1_std
       mean       std        5%      25% 