In [1]:
import logging

import pickle

import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from torch.distributions import constraints
from torch import nn
import pyro
import pyro.distributions as dist
import pyro.optim as optim
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
from pyro.infer import Predictive
import seaborn as sns
from pyro import poutine
from sklearn import metrics

In [2]:
pyro.set_rng_seed(10)

In [3]:

with open('data_all.pickle', 'rb') as handle:
    data = pickle.load(handle)
print(data.shape)

nan_mask = np.isnan(data) #when calculating the train/test set to "nan" all the examples that are for testing so that you do not train on them 
print(torch.from_numpy(nan_mask) )

(1127, 5237)
tensor([[False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]])


In [10]:
class PMF_NB_with_drug_varying_alpha(nn.Module):
    # by default our latent space is 50-dimensional
    # and we use 400 hidden units
    def __init__(self, train, dim):
        super().__init__()
        """Build the Probabilistic Matrix Factorization model using pymc3.



        """
        self.dim = dim   
        self.data = train.copy()
        self.n, self.m = self.data.shape
        self.map = None
        self.bounds = (0,1)
        self.losses = None
        self.predictions = None
        self.returned = None


        # Perform mean value imputation
    
        
        # Low precision reflects uncertainty; prevents overfitting.
        # Set to the mean variance across users and items.
        self.alpha_u = (np.mean(self.data, axis=1).mean())**2 / np.std(self.data, axis=1).mean()
        self.alpha_v = (np.mean(self.data, axis=0).mean())**2 / np.std(self.data, axis=0).mean()
        
        self.beta_u = (np.mean(self.data, axis=1).mean()) / np.std(self.data, axis=1).mean()
        self.beta_v = (np.mean(self.data, axis=0).mean()) / np.std(self.data, axis=0).mean()
        self.bias = self.data.mean()


    def model(self, train, mask):
        a = 50

        drug_plate = pyro.plate("drug_latents", self.n, dim= -1) #independent users
        sideeffect_plate = pyro.plate("sideeffect_latents", self.m, dim= -1) #independent items

        with drug_plate: 
            UA = pyro.sample("UA", dist.Gamma(self.alpha_u, self.beta_u).expand([self.dim]).to_event(1))
            #UA_int = pyro.sample("UAint", dist.Normal(0., 1.))
            exposure = pyro.sample("exposure", dist.Gamma(a,a))
        
        with sideeffect_plate:
            VA = pyro.sample("VA", dist.Gamma(self.alpha_v, self.beta_v).expand([self.dim]).to_event(1))
            #possibly add intercepts VA_int = pyro.sample("VA", dist.Normal(0., 1.).to_event(1))
       
        u2_plate = pyro.plate("u2_plate", self.n, dim=-2)

        with sideeffect_plate, u2_plate: 
            with pyro.poutine.mask(mask=mask):
             Y = pyro.sample("target", dist.Poisson(exposure[:, np.newaxis]*(UA@VA.T )), obs=train ) 
             return Y
        

    def guide(self, train=None, mask=None):

        d_alpha = pyro.param('d_alpha', torch.ones(self.n,self.dim), constraint=constraints.positive)#*self.user_mean)
        d_beta = pyro.param('d_beta', 0.5*torch.ones(self.n,self.dim), constraint=constraints.positive)
        exp_alpha = pyro.param('exp_alpha', 10*torch.ones(self.n), constraint=constraints.positive)

        s_alpha = pyro.param('s_alpha', torch.ones(self.m,self.dim), constraint=constraints.positive)#*self.item_mean)
        s_beta = pyro.param('s_beta', 0.5*torch.ones(self.m,self.dim), constraint=constraints.positive)
        drug_plate = pyro.plate("drug_latents", self.n, dim= -1) #independent users
        sideeffect_plate = pyro.plate("sideeffect_latents", self.m, dim= -1) #independent items

        with drug_plate: 
            UA = pyro.sample("UA", dist.Gamma(d_alpha, d_beta).to_event(1))
            exposure = pyro.sample("exposure", dist.Gamma(exp_alpha,exp_alpha))
        with sideeffect_plate: 
            VA = pyro.sample("VA", dist.Gamma(s_alpha, s_beta).to_event(1))
    
    def train_SVI(self,train,mask, nsteps=250, lr = 0.05, lrd = 1):
        logging.basicConfig(format='%(message)s', level=logging.INFO)
        svi = SVI(self.model,
        self.guide,
        optim.ClippedAdam({"lr": lr, "lrd": lrd}),
        loss=Trace_ELBO())
        losses = []
        for step in range(nsteps):
            elbo = svi.step(torch.from_numpy(train).float(), mask)
            losses.append(elbo)
            if step % 10 == 0:
                print("Elbo loss: {}".format(elbo))
        self.losses = losses
        #constrained_params = list(pyro.get_param_store().values())
        #PARAMS = [p.unconstrained() for p in constrained_params]
        #print(PARAMS)
        return losses
    
    def sample_predict(self, nsamples=500 , verbose=True):
        unmasked =torch.ones((self.n,self.m), dtype=torch.bool)
        predictive_svi = Predictive(self.model, guide=self.guide, num_samples=nsamples)(None , unmasked)
        if (verbose):
            for k, v in predictive_svi.items():
                print(f"{k}: {tuple(v.shape)}")
        table = predictive_svi["exposure"].numpy()
        print(table)
        self.returned = table
        mc_table = table.mean(axis = 0)
        mc_table_std = table.std(axis = 0)
        mc_table[mc_table < self.bounds[1]] = self.bounds[0]
        mc_table[mc_table >= self.bounds[1]] = self.bounds[1]
        self.predictions = mc_table
        
    
    def rmse(self,test):
        low, high = self.bounds
        test_data = test.copy()
        test_data[test_data < high] = low
        test_data[test_data >= high] = high
        sqerror = abs(test_data - self.predictions) ** 2  # squared error array
        mse = sqerror.sum()/(test_data.shape[0]*test_data.shape[1])
        print("PMF MAP training RMSE: %.5f" % np.sqrt(mse))
        fpr, tpr, thresholds = metrics.roc_curve(test_data.astype(int).flatten(),  self.predictions.astype(int).flatten(), pos_label=1)
        metrics.auc(fpr, tpr)
        print("AUC: %.5f" % metrics.auc(fpr, tpr))
        return np.sqrt(mse) , metrics.auc(fpr, tpr)

    def get_predictions(self):
        return (self.returned,self.predictions)

    

In [7]:
nan_mask = np.isnan(data) #when calculating the train/test set to "nan" all the examples that are for testing so that you do not train on them 
print(torch.from_numpy(nan_mask) )
test = PMF_NB_with_drug_varying_alpha(train=data, dim=100)
test.train_SVI(data, ~torch.from_numpy(nan_mask))

tensor([[False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]])
Elbo loss: 14194447.621154785
Elbo loss: 14130161.198669434
Elbo loss: 14284480.731445312
Elbo loss: 14230367.996765137
Elbo loss: 14324177.706298828
Elbo loss: 14331913.098388672
Elbo loss: 14118142.445678711
Elbo loss: 14296141.019104004
Elbo loss: 14119174.425048828
Elbo loss: 14156370.638671875
Elbo loss: 14143980.662963867
Elbo loss: 13827753.295166016
Elbo loss: 13845488.619506836
Elbo loss: 13972778.467041016
Elbo loss: 13938079.723144531
Elbo loss: 13949353.765014648
Elbo loss: 13896417.450317383
Elbo loss: 14047570.932373047
Elbo loss: 14177313.704833984
Elbo loss: 13987042.729858398
Elbo loss: 14221438.900634766
Elbo los

[14194447.621154785,
 14333842.333068848,
 14514311.205993652,
 14218491.99243164,
 14209397.851379395,
 14638470.243286133,
 14669731.4453125,
 14691354.706176758,
 14223818.681030273,
 14411563.524169922,
 14130161.198669434,
 14446784.10180664,
 14411112.045593262,
 14419595.591247559,
 14331046.775390625,
 14350604.32421875,
 14440236.947753906,
 14162428.555603027,
 14441033.882202148,
 14288192.323608398,
 14284480.731445312,
 14356508.256408691,
 14225457.04675293,
 14095841.653015137,
 14379981.488708496,
 14282829.19555664,
 14149645.112487793,
 14194447.8984375,
 14300100.908813477,
 14065399.546020508,
 14230367.996765137,
 14098761.446350098,
 14265725.034179688,
 14310495.785766602,
 14390462.949584961,
 13881414.09741211,
 14316394.94720459,
 14062477.778808594,
 14298401.50769043,
 14286171.079711914,
 14324177.706298828,
 14277318.918151855,
 14179637.600708008,
 14048939.700012207,
 14200867.963806152,
 14075476.076660156,
 13950326.0836792,
 14219917.582824707,
 13844

In [11]:
test.sample_predict(1000)

UA: (1000, 1, 1127, 100)
exposure: (1000, 1, 1127)
VA: (1000, 1, 5237, 100)
target: (1000, 1127, 5237)
[[[ 1.  0.  0. ...  1.  0.  1.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  2.  2.  0.]
  ...
  [12.  0.  0. ...  6.  7.  0.]
  [ 3.  0.  1. ...  8. 15.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]]

 [[ 0.  0.  0. ...  1.  2.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  2. ...  0.  2.  0.]
  ...
  [14.  0.  0. ...  6.  2.  0.]
  [ 4.  0.  0. ...  2. 25.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]]

 [[ 2.  0.  0. ...  0.  1.  0.]
  [ 1.  0.  0. ...  0.  0.  0.]
  [ 1.  0.  0. ...  0.  0.  0.]
  ...
  [14.  0.  1. ... 12.  4.  0.]
  [ 7.  0.  0. ...  7.  3.  1.]
  [ 0.  0.  0. ...  0.  1.  0.]]

 ...

 [[ 0.  0.  0. ...  0.  2.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  3. 12.  0.]
  ...
  [ 8.  0.  0. ... 21. 33.  0.]
  [ 6.  0.  0. ...  6. 19.  1.]
  [ 0.  0.  0. ...  1.  0.  0.]]

 [[ 1.  1.  0. ...  1.  1.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  1.  4. 

In [9]:
test.rmse(data)



PMF MAP training RMSE: 0.33401
AUC: 0.84041
[[ 1  0  0 ...  1  0  0]
 [ 0  0  0 ...  0  0  0]
 [ 1  0  0 ...  1  8  0]
 ...
 [ 8  0  0 ... 10 12  0]
 [ 1  0  0 ...  4 25  0]
 [ 0  0  0 ...  0  0  0]]
