In [1]:
import logging

import pickle

import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from torch.distributions import constraints
from torch import nn
import pyro
import pyro.distributions as dist
import pyro.optim as optim
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
from pyro.infer import Predictive
import seaborn as sns
from pyro import poutine
from sklearn import metrics

In [2]:
pyro.set_rng_seed(10)

In [3]:

with open('data_all.pickle', 'rb') as handle:
    data = pickle.load(handle)
print(data.shape)

nan_mask = np.isnan(data) #when calculating the train/test set to "nan" all the examples that are for testing so that you do not train on them 
print(torch.from_numpy(nan_mask) )

(1127, 5237)
tensor([[False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]])


In [4]:
class PMF_NB_with_drug_varying_alpha(nn.Module):
    # by default our latent space is 50-dimensional
    # and we use 400 hidden units
    def __init__(self, train, dim):
        super().__init__()
        """Build the Probabilistic Matrix Factorization model using pymc3.



        """
        self.dim = dim   
        self.data = train.copy()
        self.n, self.m = self.data.shape
        self.map = None
        self.bounds = (0,1)
        self.losses = None
        self.predictions = None
        self.returned = None


        # Perform mean value imputation
    
        
        # Low precision reflects uncertainty; prevents overfitting.
        # Set to the mean variance across users and items.
        self.alpha_u = (np.mean(self.data, axis=1).mean())**2 / np.std(self.data, axis=1).mean()
        self.alpha_v = (np.mean(self.data, axis=0).mean())**2 / np.std(self.data, axis=0).mean()
        
        self.beta_u = (np.mean(self.data, axis=1).mean()) / np.std(self.data, axis=1).mean()
        self.beta_v = (np.mean(self.data, axis=0).mean()) / np.std(self.data, axis=0).mean()
        self.bias = self.data.mean()


    def model(self, train, mask):
        a = 50

        drug_plate = pyro.plate("drug_latents", self.n, dim= -1) #independent users
        sideeffect_plate = pyro.plate("sideeffect_latents", self.m, dim= -1) #independent items

        with drug_plate: 
            UA = pyro.sample("UA", dist.Gamma(self.alpha_u, self.beta_u).expand([self.dim]).to_event(1))
            #UA_int = pyro.sample("UAint", dist.Normal(0., 1.))
            exposure = pyro.sample("exposure", dist.Gamma(a,a))
        
        with sideeffect_plate:
            VA = pyro.sample("VA", dist.Gamma(self.alpha_v, self.beta_v).expand([self.dim]).to_event(1))
            #possibly add intercepts VA_int = pyro.sample("VA", dist.Normal(0., 1.).to_event(1))
       
        u2_plate = pyro.plate("u2_plate", self.n, dim=-2)

        with sideeffect_plate, u2_plate: 
            with pyro.poutine.mask(mask=mask):
             Y = pyro.sample("target", dist.Poisson(exposure[:, np.newaxis]*(UA@VA.T )), obs=train ) 
             return Y
        

    def guide(self, train=None, mask=None):

        d_alpha = pyro.param('d_alpha', torch.ones(self.n,self.dim), constraint=constraints.positive)#*self.user_mean)
        d_beta = pyro.param('d_beta', 0.5*torch.ones(self.n,self.dim), constraint=constraints.positive)
        exp_alpha = pyro.param('exp_alpha', 10*torch.ones(self.n), constraint=constraints.positive)

        s_alpha = pyro.param('s_alpha', torch.ones(self.m,self.dim), constraint=constraints.positive)#*self.item_mean)
        s_beta = pyro.param('s_beta', 0.5*torch.ones(self.m,self.dim), constraint=constraints.positive)
        drug_plate = pyro.plate("drug_latents", self.n, dim= -1) #independent users
        sideeffect_plate = pyro.plate("sideeffect_latents", self.m, dim= -1) #independent items

        with drug_plate: 
            UA = pyro.sample("UA", dist.Gamma(d_alpha, d_beta).to_event(1))
            exposure = pyro.sample("exposure", dist.Gamma(exp_alpha,exp_alpha))
        with sideeffect_plate: 
            VA = pyro.sample("VA", dist.Gamma(s_alpha, s_beta).to_event(1))
    
    def train_SVI(self,train,mask, nsteps=250, lr = 0.05, lrd = 1):
        logging.basicConfig(format='%(message)s', level=logging.INFO)
        svi = SVI(self.model,
        self.guide,
        optim.ClippedAdam({"lr": lr, "lrd": lrd}),
        loss=Trace_ELBO())
        losses = []
        for step in range(nsteps):
            elbo = svi.step(torch.from_numpy(train).float(), mask)
            losses.append(elbo)
            if step % 10 == 0:
                print("Elbo loss: {}".format(elbo))
        self.losses = losses
        #constrained_params = list(pyro.get_param_store().values())
        #PARAMS = [p.unconstrained() for p in constrained_params]
        #print(PARAMS)
        return losses
    
    def sample_predict(self, nsamples=500 , verbose=True):
        unmasked =torch.ones((self.n,self.m), dtype=torch.bool)
        predictive_svi = Predictive(self.model, guide=self.guide, num_samples=nsamples)(None , unmasked)
        if (verbose):
            for k, v in predictive_svi.items():
                print(f"{k}: {tuple(v.shape)}")
        table = predictive_svi["exposure"].numpy()
        print(table)
        self.returned = table
        mc_table = table.mean(axis = 0)
        mc_table_std = table.std(axis = 0)
        mc_table[mc_table < self.bounds[1]] = self.bounds[0]
        mc_table[mc_table >= self.bounds[1]] = self.bounds[1]
        self.predictions = mc_table
        
    
    def rmse(self,test):
        low, high = self.bounds
        test_data = test.copy()
        test_data[test_data < high] = low
        test_data[test_data >= high] = high
        sqerror = abs(test_data - self.predictions) ** 2  # squared error array
        mse = sqerror.sum()/(test_data.shape[0]*test_data.shape[1])
        print("PMF MAP training RMSE: %.5f" % np.sqrt(mse))
        fpr, tpr, thresholds = metrics.roc_curve(test_data.astype(int).flatten(),  self.predictions.astype(int).flatten(), pos_label=1)
        metrics.auc(fpr, tpr)
        print("AUC: %.5f" % metrics.auc(fpr, tpr))
        return np.sqrt(mse) , metrics.auc(fpr, tpr)

    def get_predictions(self):
        return (self.returned,self.predictions)

    

In [5]:
nan_mask = np.isnan(data) #when calculating the train/test set to "nan" all the examples that are for testing so that you do not train on them 
print(torch.from_numpy(nan_mask) )
test = PMF_NB_with_drug_varying_alpha(train=data, dim=100)
test.train_SVI(data, ~torch.from_numpy(nan_mask))

tensor([[False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]])
Elbo loss: 2299921026.017166
Elbo loss: 339659708.7041626
Elbo loss: 89322424.35913086
Elbo loss: 48881848.32763672
Elbo loss: 36780380.53189087
Elbo loss: 33859506.11453247
Elbo loss: 32763926.523101807
Elbo loss: 31377885.670043945
Elbo loss: 28825171.24220276
Elbo loss: 25865128.2783432
Elbo loss: 22923475.63496065
Elbo loss: 21334513.88455963
Elbo loss: 19966221.38685608
Elbo loss: 19174857.985290527
Elbo loss: 18261999.525665283
Elbo loss: 17797722.53552246
Elbo loss: 17391315.294647217
Elbo loss: 17144953.75350952
Elbo loss: 16672882.913513184
Elbo loss: 16585416.898406982
Elbo loss: 16091566.316802979
Elbo loss: 15865470.23

[2299921026.017166,
 1896585878.9816132,
 1528808577.1617126,
 1281812323.570465,
 1047349005.3313599,
 862724784.6808167,
 697222344.0084229,
 591083613.0669861,
 482135093.70376587,
 400645422.370636,
 339659708.7041626,
 283667539.7874756,
 238814068.64575195,
 202139789.33895874,
 175065781.60766602,
 151419734.45230103,
 134255352.4430542,
 119533522.41479492,
 106911101.96694946,
 99549447.71835327,
 89322424.35913086,
 82408629.55587769,
 77360420.18356323,
 71797520.88208008,
 68260145.6499939,
 62933725.846466064,
 60019663.7673645,
 56695038.13146973,
 53983017.905548096,
 51202553.24920654,
 48881848.32763672,
 46348769.345214844,
 44581686.32183838,
 42931257.02807617,
 41098875.060791016,
 39732322.4710083,
 38863585.153045654,
 37800449.43481445,
 37676300.815093994,
 37218402.71411133,
 36780380.53189087,
 36662705.857177734,
 36767195.82522583,
 36820468.115875244,
 36559821.24105835,
 36053366.34210205,
 36384098.32397461,
 35438870.4385376,
 35218776.693115234,
 34813

In [11]:
test.sample_predict(1000)

UA: (1000, 1, 1127, 100)
exposure: (1000, 1, 1127)
VA: (1000, 1, 5237, 100)
target: (1000, 1127, 5237)
[[[ 1.  0.  0. ...  1.  0.  1.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  2.  2.  0.]
  ...
  [12.  0.  0. ...  6.  7.  0.]
  [ 3.  0.  1. ...  8. 15.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]]

 [[ 0.  0.  0. ...  1.  2.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  2. ...  0.  2.  0.]
  ...
  [14.  0.  0. ...  6.  2.  0.]
  [ 4.  0.  0. ...  2. 25.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]]

 [[ 2.  0.  0. ...  0.  1.  0.]
  [ 1.  0.  0. ...  0.  0.  0.]
  [ 1.  0.  0. ...  0.  0.  0.]
  ...
  [14.  0.  1. ... 12.  4.  0.]
  [ 7.  0.  0. ...  7.  3.  1.]
  [ 0.  0.  0. ...  0.  1.  0.]]

 ...

 [[ 0.  0.  0. ...  0.  2.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  3. 12.  0.]
  ...
  [ 8.  0.  0. ... 21. 33.  0.]
  [ 6.  0.  0. ...  6. 19.  1.]
  [ 0.  0.  0. ...  1.  0.  0.]]

 [[ 1.  1.  0. ...  1.  1.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  1.  4. 

In [9]:
test.rmse(data)



PMF MAP training RMSE: 0.33401
AUC: 0.84041
[[ 1  0  0 ...  1  0  0]
 [ 0  0  0 ...  0  0  0]
 [ 1  0  0 ...  1  8  0]
 ...
 [ 8  0  0 ... 10 12  0]
 [ 1  0  0 ...  4 25  0]
 [ 0  0  0 ...  0  0  0]]
