In [1]:
import logging

import pickle

import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from torch.distributions import constraints
from torch import nn
import pyro
import pyro.distributions as dist
import pyro.optim as optim
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
from pyro.infer import Predictive
import seaborn as sns
from pyro import poutine
from sklearn import metrics

In [2]:
pyro.set_rng_seed(10)

In [3]:
class PMF(nn.Module):
    # by default our latent space is 50-dimensional
    # and we use 400 hidden units
    def __init__(self, train, dim):
        super().__init__()
        """Build the Probabilistic Matrix Factorization model using pymc3.



        """
        self.dim = dim   
        self.data = train.copy()
        self.n, self.m = self.data.shape
        self.map = None
        self.bounds = (0,1)
        self.losses = None
        self.predictions = None
        self.returned = None


        # Perform mean value imputation
    
        
        # Low precision reflects uncertainty; prevents overfitting.
        # Set to the mean variance across users and items.
        self.alpha_u = (np.mean(self.data, axis=1).mean())**2 / np.std(self.data, axis=1).mean()
        self.alpha_v = (np.mean(self.data, axis=0).mean())**2 / np.std(self.data, axis=0).mean()
        
        self.beta_u = (np.mean(self.data, axis=1).mean()) / np.std(self.data, axis=1).mean()
        self.beta_v = (np.mean(self.data, axis=0).mean()) / np.std(self.data, axis=0).mean()
        self.bias = self.data.mean()


    def model(self, train, mask):
        a = 50

        drug_plate = pyro.plate("drug_latents", self.n, dim= -1) #independent users
        sideeffect_plate = pyro.plate("sideeffect_latents", self.m, dim= -1) #independent items

        with drug_plate: 
            UA = pyro.sample("UA", dist.Gamma(self.alpha_u, self.beta_u).expand([self.dim]).to_event(1))
            #UA_int = pyro.sample("UAint", dist.Normal(0., 1.))
        
        with sideeffect_plate:
            VA = pyro.sample("VA", dist.Gamma(self.alpha_v, self.beta_v).expand([self.dim]).to_event(1))
            #possibly add intercepts VA_int = pyro.sample("VA", dist.Normal(0., 1.).to_event(1))
       
        u2_plate = pyro.plate("u2_plate", self.n, dim=-2)

        with sideeffect_plate, u2_plate: 
            with pyro.poutine.mask(mask=mask):
             Y = pyro.sample("target", dist.NegativeBinomial(a, UA@VA.T/( UA@VA.T+a) ), obs=train ) 
             return Y
        

    def guide(self, train=None, mask=None):

        d_alpha = pyro.param('d_alpha', torch.ones(self.n,self.dim), constraint=constraints.positive)#*self.user_mean)
        d_beta = pyro.param('d_beta', 0.5*torch.ones(self.n,self.dim), constraint=constraints.positive)
       # int_mean = pyro.param('int_mean', torch.tensor(1.)*self.user_mean)
       # mov_cov = pyro.param('mov_cov', torch.tensor(1.)*0.1,
          #                  constraint=constraints.positive)

        s_alpha = pyro.param('s_alpha', torch.ones(self.m,self.dim), constraint=constraints.positive)#*self.item_mean)
        s_beta = pyro.param('s_beta', 0.5*torch.ones(self.m,self.dim), constraint=constraints.positive)
        drug_plate = pyro.plate("drug_latents", self.n, dim= -1) #independent users
        sideeffect_plate = pyro.plate("sideeffect_latents", self.m, dim= -1) #independent items

        with drug_plate: 
            UA = pyro.sample("UA", dist.Gamma(d_alpha, d_beta).to_event(1))
           # UA_int = pyro.sample("UAint", dist.Normal(int_mean, mov_cov).to_event(1))
        with sideeffect_plate: 
            VA = pyro.sample("VA", dist.Gamma(s_alpha, s_beta).to_event(1))
    
    def train_SVI(self,train,mask, nsteps=250, lr = 0.05, lrd = 1):
        logging.basicConfig(format='%(message)s', level=logging.INFO)
        svi = SVI(self.model,
        self.guide,
        optim.ClippedAdam({"lr": lr, "lrd": lrd}),
        loss=Trace_ELBO())
        losses = []
        for step in range(nsteps):
            elbo = svi.step(torch.from_numpy(train).float(), mask)
            losses.append(elbo)
            if step % 10 == 0:
                print("Elbo loss: {}".format(elbo))
        self.losses = losses
        #constrained_params = list(pyro.get_param_store().values())
        #PARAMS = [p.unconstrained() for p in constrained_params]
        #print(PARAMS)
        return losses
    
    def sample_predict(self, nsamples=500 , verbose=True):
        unmasked =torch.ones((self.n,self.m), dtype=torch.bool)
        predictive_svi = Predictive(self.model, guide=self.guide, num_samples=nsamples)(None , unmasked)
        if (verbose):
            for k, v in predictive_svi.items():
                print(f"{k}: {tuple(v.shape)}")
        table = predictive_svi["target"].numpy()
        print(table)
        self.returned = table
        mc_table = table.mean(axis = 0)
        mc_table_std = table.std(axis = 0)
        mc_table[mc_table < self.bounds[1]] = self.bounds[0]
        mc_table[mc_table >= self.bounds[1]] = self.bounds[1]
        self.predictions = mc_table
        
    
    def rmse(self,test):
        low, high = self.bounds
        test_data = test.copy()
        test_data[test_data < high] = low
        test_data[test_data >= high] = high
        sqerror = abs(test_data - self.predictions) ** 2  # squared error array
        mse = sqerror.sum()/(test_data.shape[0]*test_data.shape[1])
        print("PMF MAP training RMSE: %.5f" % np.sqrt(mse))
        fpr, tpr, thresholds = metrics.roc_curve(test_data.astype(int).flatten(),  self.predictions.astype(int).flatten(), pos_label=1)
        metrics.auc(fpr, tpr)
        print("AUC: %.5f" % metrics.auc(fpr, tpr))
        return np.sqrt(mse) , metrics.auc(fpr, tpr)

    def get_predictions(self):
        return (self.returned,self.predictions)

    
   
       

In [3]:

with open('data_all.pickle', 'rb') as handle:
    data = pickle.load(handle)
print(data.shape)

nan_mask = np.isnan(data) #when calculating the train/test set to "nan" all the examples that are for testing so that you do not train on them 
print(torch.from_numpy(nan_mask) )

(1127, 5237)
tensor([[False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]])


In [8]:
test = PMF(train=data, dim=100)
test.train_SVI(data, ~torch.from_numpy(nan_mask))


Elbo loss: 9055118.078125
Elbo loss: 9098882.40625
Elbo loss: 9098319.78125
Elbo loss: 8944937.421875
Elbo loss: 8960036.609375
Elbo loss: 8975340.296875
Elbo loss: 8975446.015625
Elbo loss: 8960144.546875
Elbo loss: 8974696.4921875
Elbo loss: 8968002.8671875
Elbo loss: 8966022.4765625
Elbo loss: 8903937.1484375
Elbo loss: 8957122.9453125
Elbo loss: 8926410.2265625
Elbo loss: 8925323.78125
Elbo loss: 9011655.4296875
Elbo loss: 8939463.015625
Elbo loss: 8934858.796875
Elbo loss: 8910493.421875
Elbo loss: 8971726.765625
Elbo loss: 8934409.6484375
Elbo loss: 8939027.8984375
Elbo loss: 8913324.953125
Elbo loss: 8867438.7578125
Elbo loss: 8900528.0390625


[9055118.078125,
 9083290.515625,
 9042242.46875,
 9069657.359375,
 9041994.7109375,
 9152475.5234375,
 9091170.0546875,
 9057834.6640625,
 9054962.546875,
 9086004.734375,
 9098882.40625,
 9074073.7109375,
 9104094.953125,
 9075988.625,
 9122053.78125,
 9036397.59375,
 9031093.109375,
 9022227.2265625,
 9049068.015625,
 8996837.90625,
 9098319.78125,
 9060713.2734375,
 9057273.359375,
 9011294.296875,
 9001157.75,
 9026720.7421875,
 9007314.875,
 9022993.0859375,
 9015175.3515625,
 9013876.984375,
 8944937.421875,
 9014812.75,
 8955290.6953125,
 9037072.84375,
 8952280.265625,
 8989081.21875,
 9036661.109375,
 9033199.5546875,
 9005478.7421875,
 9010961.1328125,
 8960036.609375,
 8925865.4921875,
 8939705.34375,
 8965799.1328125,
 8971165.4140625,
 9038159.875,
 8981584.1640625,
 9000537.8046875,
 8974634.4140625,
 8936924.3984375,
 8975340.296875,
 8986144.875,
 9002850.53125,
 8986332.9375,
 8928115.234375,
 8966553.3515625,
 8958798.046875,
 9011376.640625,
 8944591.359375,
 897208

In [9]:
test.sample_predict(1000)

UA: (1000, 1, 1127, 100)
VA: (1000, 1, 5237, 100)
target: (1000, 1127, 5237)
[[[  0.   0.   0. ...   1.   4.   0.]
  [  0.   0.   0. ...   1.   0.   0.]
  [  0.   4.   0. ...   1.   9.   0.]
  ...
  [ 12.   0.   1. ... 105.  18.   1.]
  [  4.   0.   0. ...   9.   9.   0.]
  [  0.   0.   0. ...   0.   0.   0.]]

 [[  2.   3.   0. ...   0.   1.   0.]
  [  0.   0.   0. ...   0.   2.   0.]
  [  0.   0.   0. ...   1.   5.   0.]
  ...
  [ 33.   1.   0. ...  66.   7.   1.]
  [  0.   0.   0. ...   5.  16.   0.]
  [  0.   0.   0. ...   0.   0.   0.]]

 [[  1.   0.   0. ...   1.   2.   0.]
  [  0.   0.   0. ...   0.   0.   0.]
  [  1.   0.   0. ...   1.   3.   0.]
  ...
  [ 15.   0.   0. ...  48.   6.   0.]
  [  5.   0.   0. ...   4.   8.   0.]
  [  0.   0.   0. ...   0.   1.   0.]]

 ...

 [[  1.   0.   0. ...   1.   1.   1.]
  [  0.   0.   0. ...   1.   0.   0.]
  [  1.   0.   0. ...   3.   1.   0.]
  ...
  [ 24.   0.   0. ...  20.  20.   0.]
  [  2.   0.   0. ...   6.   5.   0.]
  [  0.   0. 

In [10]:
test.rmse(data)
print(test.get_predictions())
print(data)

PMF MAP training RMSE: 0.32548
AUC: 0.84278
(array([[[  0.,   0.,   0., ...,   1.,   4.,   0.],
        [  0.,   0.,   0., ...,   1.,   0.,   0.],
        [  0.,   4.,   0., ...,   1.,   9.,   0.],
        ...,
        [ 12.,   0.,   1., ..., 105.,  18.,   1.],
        [  4.,   0.,   0., ...,   9.,   9.,   0.],
        [  0.,   0.,   0., ...,   0.,   0.,   0.]],

       [[  2.,   3.,   0., ...,   0.,   1.,   0.],
        [  0.,   0.,   0., ...,   0.,   2.,   0.],
        [  0.,   0.,   0., ...,   1.,   5.,   0.],
        ...,
        [ 33.,   1.,   0., ...,  66.,   7.,   1.],
        [  0.,   0.,   0., ...,   5.,  16.,   0.],
        [  0.,   0.,   0., ...,   0.,   0.,   0.]],

       [[  1.,   0.,   0., ...,   1.,   2.,   0.],
        [  0.,   0.,   0., ...,   0.,   0.,   0.],
        [  1.,   0.,   0., ...,   1.,   3.,   0.],
        ...,
        [ 15.,   0.,   0., ...,  48.,   6.,   0.],
        [  5.,   0.,   0., ...,   4.,   8.,   0.],
        [  0.,   0.,   0., ...,   0.,   1.,  

In [4]:
class PMF_zero_inflated_poisson(nn.Module):

    # by default our latent space is 50-dimensional
    # and we use 400 hidden units
    def __init__(self, train, dim):
        super().__init__()
        """Build the Probabilistic Matrix Factorization model using pymc3.



        """
        self.dim = dim   
        self.data = train.copy()
        self.n, self.m = self.data.shape
        self.map = None
        self.bounds = (0,1)
        self.losses = None
        self.predictions = None
        self.returned = None


        # Perform mean value imputation
    
        
        # Low precision reflects uncertainty; prevents overfitting.
        # Set to the mean variance across users and items.
        self.alpha_u = (np.mean(self.data, axis=1).mean())**2 / np.std(self.data, axis=1).mean()
        self.alpha_v = (np.mean(self.data, axis=0).mean())**2 / np.std(self.data, axis=0).mean()
        
        self.beta_u = (np.mean(self.data, axis=1).mean()) / np.std(self.data, axis=1).mean()
        self.beta_v = (np.mean(self.data, axis=0).mean()) / np.std(self.data, axis=0).mean()
        self.bias = self.data.mean()
        self.alpha = 1


    def model(self, train, mask):
        alpha = 1
        beta = 1

        drug_plate = pyro.plate("drug_latents", self.n, dim= -1) #independent users
        sideeffect_plate = pyro.plate("sideeffect_latents", self.m, dim= -1) #independent items

        with drug_plate: 
            UA = pyro.sample("UA", dist.Gamma(self.alpha_u, self.beta_u).expand([self.dim]).to_event(1))
            #alpha = pyro.sample("alpha", dist.Poisson(self.alpha))
            p = pyro.sample("p", dist.Beta(alpha, beta))
        
        with sideeffect_plate:
            VA = pyro.sample("VA", dist.Gamma(self.alpha_v, self.beta_v).expand([self.dim]).to_event(1))
            #possibly add intercepts VA_int = pyro.sample("VA", dist.Normal(0., 1.).to_event(1))
       
        u2_plate = pyro.plate("u2_plate", self.n, dim=-2)

        with sideeffect_plate, u2_plate: 
           # with pyro.poutine.mask(mask=mask):
             Y = pyro.sample("target", dist.ZeroInflatedPoisson( rate = UA@VA.T ,gate = p[:, np.newaxis]), obs=train ) 
             return Y
        

    def guide(self, train=None, mask=None):

        d_alpha = pyro.param('d_alpha', torch.ones(self.n,self.dim), constraint=constraints.positive)#*self.user_mean)
        d_beta = pyro.param('d_beta', 0.5*torch.ones(self.n,self.dim), constraint=constraints.positive)
        rate_alpha = pyro.param('rate_alpha', torch.ones(self.n), constraint=constraints.positive)
        rate_beta = pyro.param('rate_beta', torch.ones(self.n), constraint=constraints.positive)


        s_alpha = pyro.param('s_alpha', torch.ones(self.m,self.dim), constraint=constraints.positive)#*self.item_mean)
        s_beta = pyro.param('s_beta', 0.5*torch.ones(self.m,self.dim), constraint=constraints.positive)
        drug_plate = pyro.plate("drug_latents", self.n, dim= -1) #independent users
        sideeffect_plate = pyro.plate("sideeffect_latents", self.m, dim= -1) #independent items

        with drug_plate: 
            UA = pyro.sample("UA", dist.Gamma(d_alpha, d_beta).to_event(1))
            p = pyro.sample("p", dist.Beta(rate_beta,rate_alpha))

        with sideeffect_plate: 
            VA = pyro.sample("VA", dist.Gamma(s_alpha, s_beta).to_event(1))
    
    def train_SVI(self,train,mask, nsteps=250, lr = 0.05, lrd = 1):
        logging.basicConfig(format='%(message)s', level=logging.INFO)
        svi = SVI(self.model,
        self.guide,
        optim.ClippedAdam({"lr": lr, "lrd": lrd}),
        loss=Trace_ELBO())
        losses = []
        for step in range(nsteps):
            elbo = svi.step(torch.from_numpy(train).float(), mask)
            losses.append(elbo)
            if step % 10 == 0:
                print("Elbo loss: {}".format(elbo))
        self.losses = losses
        #constrained_params = list(pyro.get_param_store().values())
        #PARAMS = [p.unconstrained() for p in constrained_params]
        #print(PARAMS)
        return losses
    
    def sample_predict(self, nsamples=500 , verbose=True):
        unmasked =torch.ones((self.n,self.m), dtype=torch.bool)
        predictive_svi = Predictive(self.model, guide=self.guide, num_samples=nsamples)(None , unmasked)
        if (verbose):
            for k, v in predictive_svi.items():
                print(f"{k}: {tuple(v.shape)}")
        table = predictive_svi["target"].numpy()
        print(table)
        self.returned = table
        mc_table = table.mean(axis = 0)
        mc_table_std = table.std(axis = 0)
        mc_table[mc_table < self.bounds[1]] = self.bounds[0]
        mc_table[mc_table >= self.bounds[1]] = self.bounds[1]
        self.predictions = mc_table
        
    
    def rmse(self,test):
        low, high = self.bounds
        test_data = test.copy()
        test_data[test_data < high] = low
        test_data[test_data >= high] = high
        sqerror = abs(test_data - self.predictions) ** 2  # squared error array
        mse = sqerror.sum()/(test_data.shape[0]*test_data.shape[1])
        print("PMF MAP training RMSE: %.5f" % np.sqrt(mse))
        fpr, tpr, thresholds = metrics.roc_curve(test_data.astype(int).flatten(),  self.predictions.astype(int).flatten(), pos_label=1)
        metrics.auc(fpr, tpr)
        print("AUC: %.5f" % metrics.auc(fpr, tpr))
        return np.sqrt(mse) , metrics.auc(fpr, tpr)

    def get_predictions(self):
        return (self.returned,self.predictions)

    

In [7]:

nan_mask = np.isnan(data) #when calculating the train/test set to "nan" all the examples that are for testing so that you do not train on them 
print(torch.from_numpy(nan_mask) )
test = PMF_zero_inflated_poisson(train=data, dim=100)
test.train_SVI(data, ~torch.from_numpy(nan_mask))



tensor([[False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]])
Elbo loss: 13729418.012771606
Elbo loss: 13750145.610977173
Elbo loss: 13962415.76576233
Elbo loss: 14119793.30834961
Elbo loss: 13824654.43333435
Elbo loss: 13529111.021270752
Elbo loss: 13652855.801116943
Elbo loss: 13721328.9821167
Elbo loss: 13713437.802825928
Elbo loss: 13583459.632843018
Elbo loss: 13578454.516143799
Elbo loss: 13874178.841888428
Elbo loss: 13529643.261825562
Elbo loss: 13496243.551498413
Elbo loss: 13694373.06930542
Elbo loss: 14108100.281860352
Elbo loss: 13625978.810073853
Elbo loss: 13809180.250991821
Elbo loss: 13647418.549407959
Elbo loss: 13414881.601287842
Elbo loss: 13374836.144683838
Elbo loss: 136

[13729418.012771606,
 13799094.6040802,
 13937064.166641235,
 14044982.368469238,
 13740619.588027954,
 14102335.373184204,
 14144757.285995483,
 14303679.13168335,
 13957817.211029053,
 14330843.800842285,
 13750145.610977173,
 14096368.707519531,
 13901555.47517395,
 14066655.321548462,
 14226695.234725952,
 13725984.370407104,
 14066136.482330322,
 13839212.206588745,
 13854251.875823975,
 13762393.408065796,
 13962415.76576233,
 14064696.379089355,
 13823601.69203186,
 13776417.682678223,
 13716088.710235596,
 13696205.110061646,
 13693988.414520264,
 13681650.319534302,
 13648156.417098999,
 13508108.145874023,
 14119793.30834961,
 13991413.649398804,
 13794762.69897461,
 13966015.728988647,
 13651513.499816895,
 14044038.64138794,
 13633606.217758179,
 13745714.428619385,
 14126495.49293518,
 14070257.738983154,
 13824654.43333435,
 13906832.114593506,
 13913919.809814453,
 13824604.572677612,
 14103393.58883667,
 13857544.905853271,
 13991888.443603516,
 13700721.345794678,
 137

In [8]:
test.sample_predict(1000)



UA: (1000, 1, 1127, 100)
p: (1000, 1, 1127)
VA: (1000, 1, 5237, 100)
target: (1000, 1127, 5237)
[[[ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  2.  0. ...  0.  0.  0.]
  [ 2.  1.  0. ...  0.  0.  0.]
  ...
  [ 0.  2.  1. ...  0.  0.  1.]
  [ 5.  0.  0. ...  0. 30.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]]

 [[ 0.  0.  0. ...  0.  1.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 1.  0.  0. ...  0.  0.  0.]
  ...
  [16.  1.  0. ... 13. 17.  0.]
  [ 2.  0.  0. ...  4. 20.  3.]
  [ 0.  1.  0. ...  3.  1.  1.]]

 [[ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 1.  0.  0. ...  4.  0.  0.]
  ...
  [ 0.  0.  0. ...  6.  0.  0.]
  [ 3.  0.  1. ...  0.  0.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]]

 ...

 [[ 3.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 2.  0.  0. ...  0.  7.  1.]
  ...
  [12.  1.  2. ...  2.  0.  1.]
  [ 3.  1.  0. ...  9. 29.  1.]
  [ 0.  0.  0. ...  0.  0.  0.]]

 [[ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  0.  0.  1.]
  

In [9]:
test.rmse(data)
print(test.get_predictions())
print(data)


PMF MAP training RMSE: 0.36273
AUC: 0.81252
(array([[[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  2.,  0., ...,  0.,  0.,  0.],
        [ 2.,  1.,  0., ...,  0.,  0.,  0.],
        ...,
        [ 0.,  2.,  1., ...,  0.,  0.,  1.],
        [ 5.,  0.,  0., ...,  0., 30.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 0.,  0.,  0., ...,  0.,  1.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        ...,
        [16.,  1.,  0., ..., 13., 17.,  0.],
        [ 2.,  0.,  0., ...,  4., 20.,  3.],
        [ 0.,  1.,  0., ...,  3.,  1.,  1.]],

       [[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  4.,  0.,  0.],
        ...,
        [ 0.,  0.,  0., ...,  6.,  0.,  0.],
        [ 3.,  0.,  1., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]],

       ...,

       [[ 3.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.

In [11]:
class PMF_zero_NB(nn.Module):

    # by default our latent space is 50-dimensional
    # and we use 400 hidden units
    def __init__(self, train, dim):
        super().__init__()
        """Build the Probabilistic Matrix Factorization model using pymc3.



        """
        self.dim = dim   
        self.data = train.copy()
        self.n, self.m = self.data.shape
        self.map = None
        self.bounds = (0,1)
        self.losses = None
        self.predictions = None
        self.returned = None


        # Perform mean value imputation
    
        
        # Low precision reflects uncertainty; prevents overfitting.
        # Set to the mean variance across users and items.
        self.alpha_u = (np.mean(self.data, axis=1).mean())**2 / np.std(self.data, axis=1).mean()
        self.alpha_v = (np.mean(self.data, axis=0).mean())**2 / np.std(self.data, axis=0).mean()
        
        self.beta_u = (np.mean(self.data, axis=1).mean()) / np.std(self.data, axis=1).mean()
        self.beta_v = (np.mean(self.data, axis=0).mean()) / np.std(self.data, axis=0).mean()
        self.bias = self.data.mean()
        self.alpha = 1


    def model(self, train, mask):
        alpha = 1
        beta = 1

        drug_plate = pyro.plate("drug_latents", self.n, dim= -1) #independent users
        sideeffect_plate = pyro.plate("sideeffect_latents", self.m, dim= -1) #independent items

        with drug_plate: 
            UA = pyro.sample("UA", dist.Gamma(self.alpha_u, self.beta_u).expand([self.dim]).to_event(1))
            #alpha = pyro.sample("alpha", dist.Poisson(self.alpha))
            p = pyro.sample("p", dist.Beta(alpha, beta))
        
        with sideeffect_plate:
            VA = pyro.sample("VA", dist.Gamma(self.alpha_v, self.beta_v).expand([self.dim]).to_event(1))
            #possibly add intercepts VA_int = pyro.sample("VA", dist.Normal(0., 1.).to_event(1))
       
        u2_plate = pyro.plate("u2_plate", self.n, dim=-2)

        with sideeffect_plate, u2_plate: 
           # with pyro.poutine.mask(mask=mask):
             Y = pyro.sample("target", dist.ZeroInflatedDistribution( base_dist= dist.NegativeBinomial(alpha, UA@VA.T/( UA@VA.T+alpha)) ,gate = p[:, np.newaxis]), obs=train ) 
             return Y
        

    def guide(self, train=None, mask=None):

        d_alpha = pyro.param('d_alpha', torch.ones(self.n,self.dim), constraint=constraints.positive)#*self.user_mean)
        d_beta = pyro.param('d_beta', 0.5*torch.ones(self.n,self.dim), constraint=constraints.positive)
        rate_alpha = pyro.param('rate_alpha', torch.ones(self.n), constraint=constraints.positive)
        rate_beta = pyro.param('rate_beta', torch.ones(self.n), constraint=constraints.positive)


        s_alpha = pyro.param('s_alpha', torch.ones(self.m,self.dim), constraint=constraints.positive)#*self.item_mean)
        s_beta = pyro.param('s_beta', 0.5*torch.ones(self.m,self.dim), constraint=constraints.positive)
        drug_plate = pyro.plate("drug_latents", self.n, dim= -1) #independent users
        sideeffect_plate = pyro.plate("sideeffect_latents", self.m, dim= -1) #independent items

        with drug_plate: 
            UA = pyro.sample("UA", dist.Gamma(d_alpha, d_beta).to_event(1))
            p = pyro.sample("p", dist.Beta(rate_beta,rate_alpha))

        with sideeffect_plate: 
            VA = pyro.sample("VA", dist.Gamma(s_alpha, s_beta).to_event(1))
    
    def train_SVI(self,train,mask, nsteps=250, lr = 0.05, lrd = 1):
        logging.basicConfig(format='%(message)s', level=logging.INFO)
        svi = SVI(self.model,
        self.guide,
        optim.ClippedAdam({"lr": lr, "lrd": lrd}),
        loss=Trace_ELBO())
        losses = []
        for step in range(nsteps):
            elbo = svi.step(torch.from_numpy(train).float(), mask)
            losses.append(elbo)
            if step % 10 == 0:
                print("Elbo loss: {}".format(elbo))
        self.losses = losses
        #constrained_params = list(pyro.get_param_store().values())
        #PARAMS = [p.unconstrained() for p in constrained_params]
        #print(PARAMS)
        return losses
    
    def sample_predict(self, nsamples=500 , verbose=True):
        unmasked =torch.ones((self.n,self.m), dtype=torch.bool)
        predictive_svi = Predictive(self.model, guide=self.guide, num_samples=nsamples)(None , unmasked)
        if (verbose):
            for k, v in predictive_svi.items():
                print(f"{k}: {tuple(v.shape)}")
        table = predictive_svi["target"].numpy()
        print(table)
        self.returned = table
        mc_table = table.mean(axis = 0)
        mc_table_std = table.std(axis = 0)
        mc_table[mc_table < self.bounds[1]] = self.bounds[0]
        mc_table[mc_table >= self.bounds[1]] = self.bounds[1]
        self.predictions = mc_table
        
    
    def rmse(self,test):
        low, high = self.bounds
        test_data = test.copy()
        test_data[test_data < high] = low
        test_data[test_data >= high] = high
        sqerror = abs(test_data - self.predictions) ** 2  # squared error array
        mse = sqerror.sum()/(test_data.shape[0]*test_data.shape[1])
        print("PMF MAP training RMSE: %.5f" % np.sqrt(mse))
        fpr, tpr, thresholds = metrics.roc_curve(test_data.astype(int).flatten(),  self.predictions.astype(int).flatten(), pos_label=1)
        metrics.auc(fpr, tpr)
        print("AUC: %.5f" % metrics.auc(fpr, tpr))
        return np.sqrt(mse) , metrics.auc(fpr, tpr)

    def get_predictions(self):
        return (self.returned,self.predictions)

    

In [15]:
nan_mask = np.isnan(data) #when calculating the train/test set to "nan" all the examples that are for testing so that you do not train on them 
print(torch.from_numpy(nan_mask) )
test = PMF_zero_NB(train=data, dim=100)
test.train_SVI(data, ~torch.from_numpy(nan_mask))


tensor([[False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]])
Elbo loss: 6012035.456176758
Elbo loss: 6026213.563171387
Elbo loss: 5999806.37689209
Elbo loss: 6007662.76953125
Elbo loss: 5995980.518127441
Elbo loss: 5969965.423828125
Elbo loss: 6014862.355224609
Elbo loss: 5978878.61340332
Elbo loss: 5958227.224975586
Elbo loss: 5949028.366088867
Elbo loss: 5967524.559936523
Elbo loss: 5937743.723571777
Elbo loss: 5964058.8966674805
Elbo loss: 5960032.478393555
Elbo loss: 5957602.663635254
Elbo loss: 5935067.991943359
Elbo loss: 5938172.410766602
Elbo loss: 5915344.634277344
Elbo loss: 5933078.581726074
Elbo loss: 5929822.35534668
Elbo loss: 5946331.634033203
Elbo loss: 5940296.6384887695
El

[6012035.456176758,
 6012342.945617676,
 6004319.902587891,
 6017493.768127441,
 6034775.2158203125,
 6007978.318664551,
 6030311.8955078125,
 5986345.342956543,
 6015944.429016113,
 6018410.323974609,
 6026213.563171387,
 6028207.652770996,
 6013876.738891602,
 6020635.438964844,
 6039289.8212890625,
 5998328.465454102,
 6028851.170043945,
 5993492.186157227,
 6026789.848510742,
 6003249.84185791,
 5999806.37689209,
 6022679.270263672,
 6039606.224121094,
 5974590.749328613,
 5992812.397521973,
 6003235.42980957,
 6026491.765380859,
 6026796.878845215,
 6000504.59576416,
 5976955.481811523,
 6007662.76953125,
 5970413.448547363,
 5988363.527587891,
 5985468.2060546875,
 5962649.883850098,
 6008944.888000488,
 6005564.186889648,
 5974745.263000488,
 5984125.41796875,
 6015425.143432617,
 5995980.518127441,
 5986274.057678223,
 5970616.791931152,
 5985542.175231934,
 5971055.80456543,
 5971207.098693848,
 5976890.8572387695,
 5988524.072509766,
 6006659.8359375,
 5955468.06842041,
 5969

In [16]:
test.sample_predict(1000)

UA: (1000, 1, 1127, 100)
p: (1000, 1, 1127)
VA: (1000, 1, 5237, 100)
target: (1000, 1127, 5237)
[[[ 0.  0.  0. ...  0.  3.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  ...
  [25.  0.  1. ...  5. 14.  1.]
  [ 0.  0.  0. ...  1.  6.  1.]
  [ 0.  0.  0. ...  0.  0.  0.]]

 [[ 0.  1.  0. ...  0.  2.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 2.  0.  0. ...  0.  0.  0.]
  ...
  [ 3.  0.  0. ... 13.  7.  0.]
  [ 0.  0.  0. ...  1.  3.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]]

 [[ 2.  1.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  1.  0. ...  7.  0.  0.]
  ...
  [ 0.  2.  0. ... 10. 49.  0.]
  [12.  0.  0. ...  3. 39.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]]

 ...

 [[ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 2.  1.  1. ...  2.  7.  0.]
  ...
  [ 0.  0.  0. ... 10.  0.  0.]
  [ 2.  0.  0. ...  0. 13.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]]

 [[ 0.  0.  0. ...  0.  0.  1.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0.  0.  0. ...  0.  0.  1.]
  

In [17]:
test.rmse(data)
print(test.get_predictions())
print(data)

PMF MAP training RMSE: 0.34168
AUC: 0.83550
(array([[[ 0.,  0.,  0., ...,  0.,  3.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        ...,
        [25.,  0.,  1., ...,  5., 14.,  1.],
        [ 0.,  0.,  0., ...,  1.,  6.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 0.,  1.,  0., ...,  0.,  2.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 2.,  0.,  0., ...,  0.,  0.,  0.],
        ...,
        [ 3.,  0.,  0., ..., 13.,  7.,  0.],
        [ 0.,  0.,  0., ...,  1.,  3.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 2.,  1.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  1.,  0., ...,  7.,  0.,  0.],
        ...,
        [ 0.,  2.,  0., ..., 10., 49.,  0.],
        [12.,  0.,  0., ...,  3., 39.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]],

       ...,

       [[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.

In [4]:
class PMF_NB_with_drug_varying_alpha(nn.Module):
    # by default our latent space is 50-dimensional
    # and we use 400 hidden units
    def __init__(self, train, dim):
        super().__init__()
        """Build the Probabilistic Matrix Factorization model using pymc3.



        """
        self.dim = dim   
        self.data = train.copy()
        self.n, self.m = self.data.shape
        self.map = None
        self.bounds = (0,1)
        self.losses = None
        self.predictions = None
        self.returned = None


        # Perform mean value imputation
    
        
        # Low precision reflects uncertainty; prevents overfitting.
        # Set to the mean variance across users and items.
        self.alpha_u = (np.mean(self.data, axis=1).mean())**2 / np.std(self.data, axis=1).mean()
        self.alpha_v = (np.mean(self.data, axis=0).mean())**2 / np.std(self.data, axis=0).mean()
        
        self.beta_u = (np.mean(self.data, axis=1).mean()) / np.std(self.data, axis=1).mean()
        self.beta_v = (np.mean(self.data, axis=0).mean()) / np.std(self.data, axis=0).mean()
        self.bias = self.data.mean()


    def model(self, train, mask):
        a = 50

        drug_plate = pyro.plate("drug_latents", self.n, dim= -1) #independent users
        sideeffect_plate = pyro.plate("sideeffect_latents", self.m, dim= -1) #independent items

        with drug_plate: 
            UA = pyro.sample("UA", dist.Gamma(self.alpha_u, self.beta_u).expand([self.dim]).to_event(1))
            #UA_int = pyro.sample("UAint", dist.Normal(0., 1.))
            alpha = pyro.sample("alpha", dist.Gamma(a,a))
        
        with sideeffect_plate:
            VA = pyro.sample("VA", dist.Gamma(self.alpha_v, self.beta_v).expand([self.dim]).to_event(1))
            #possibly add intercepts VA_int = pyro.sample("VA", dist.Normal(0., 1.).to_event(1))
       
        u2_plate = pyro.plate("u2_plate", self.n, dim=-2)

        with sideeffect_plate, u2_plate: 
            with pyro.poutine.mask(mask=mask):
             Y = pyro.sample("target", dist.NegativeBinomial(alpha[:, np.newaxis], UA@VA.T/( UA@VA.T+alpha[:, np.newaxis]) ), obs=train ) 
             return Y
        

    def guide(self, train=None, mask=None):

        d_alpha = pyro.param('d_alpha', torch.ones(self.n,self.dim), constraint=constraints.positive)#*self.user_mean)
        d_beta = pyro.param('d_beta', 0.5*torch.ones(self.n,self.dim), constraint=constraints.positive)
        p_alpha = pyro.param('p_alpha', 50*torch.ones(self.n), constraint=constraints.positive)

        s_alpha = pyro.param('s_alpha', torch.ones(self.m,self.dim), constraint=constraints.positive)#*self.item_mean)
        s_beta = pyro.param('s_beta', 0.5*torch.ones(self.m,self.dim), constraint=constraints.positive)
        drug_plate = pyro.plate("drug_latents", self.n, dim= -1) #independent users
        sideeffect_plate = pyro.plate("sideeffect_latents", self.m, dim= -1) #independent items

        with drug_plate: 
            UA = pyro.sample("UA", dist.Gamma(d_alpha, d_beta).to_event(1))
            alpha = pyro.sample("alpha", dist.Poisson(p_alpha))
        with sideeffect_plate: 
            VA = pyro.sample("VA", dist.Gamma(s_alpha, s_beta).to_event(1))
    
    def train_SVI(self,train,mask, nsteps=250, lr = 0.05, lrd = 1):
        logging.basicConfig(format='%(message)s', level=logging.INFO)
        svi = SVI(self.model,
        self.guide,
        optim.ClippedAdam({"lr": lr, "lrd": lrd}),
        loss=Trace_ELBO())
        losses = []
        for step in range(nsteps):
            elbo = svi.step(torch.from_numpy(train).float(), mask)
            losses.append(elbo)
            if step % 10 == 0:
                print("Elbo loss: {}".format(elbo))
        self.losses = losses
        #constrained_params = list(pyro.get_param_store().values())
        #PARAMS = [p.unconstrained() for p in constrained_params]
        #print(PARAMS)
        return losses
    
    def sample_predict(self, nsamples=500 , verbose=True):
        unmasked =torch.ones((self.n,self.m), dtype=torch.bool)
        predictive_svi = Predictive(self.model, guide=self.guide, num_samples=nsamples)(None , unmasked)
        if (verbose):
            for k, v in predictive_svi.items():
                print(f"{k}: {tuple(v.shape)}")
        table = predictive_svi["target"].numpy()
        print(table)
        self.returned = table
        mc_table = table.mean(axis = 0)
        mc_table_std = table.std(axis = 0)
        mc_table[mc_table < self.bounds[1]] = self.bounds[0]
        mc_table[mc_table >= self.bounds[1]] = self.bounds[1]
        self.predictions = mc_table
        
    
    def rmse(self,test):
        low, high = self.bounds
        test_data = test.copy()
        test_data[test_data < high] = low
        test_data[test_data >= high] = high
        sqerror = abs(test_data - self.predictions) ** 2  # squared error array
        mse = sqerror.sum()/(test_data.shape[0]*test_data.shape[1])
        print("PMF MAP training RMSE: %.5f" % np.sqrt(mse))
        fpr, tpr, thresholds = metrics.roc_curve(test_data.astype(int).flatten(),  self.predictions.astype(int).flatten(), pos_label=1)
        metrics.auc(fpr, tpr)
        print("AUC: %.5f" % metrics.auc(fpr, tpr))
        return np.sqrt(mse) , metrics.auc(fpr, tpr)

    def get_predictions(self):
        return (self.returned,self.predictions)

    

In [5]:
nan_mask = np.isnan(data) #when calculating the train/test set to "nan" all the examples that are for testing so that you do not train on them 
print(torch.from_numpy(nan_mask) )
test = PMF_NB_with_drug_varying_alpha(train=data, dim=100)
test.train_SVI(data, ~torch.from_numpy(nan_mask))

tensor([[False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]])
Elbo loss: 624782154.984375
Elbo loss: 210031890.42333984
Elbo loss: 57739272.76611328
Elbo loss: 27311795.94580078
Elbo loss: 19653596.55517578
Elbo loss: 17916772.713378906
Elbo loss: 17202988.59033203
Elbo loss: 16705030.41381836
Elbo loss: 15988915.490234375
Elbo loss: 15020519.251220703
Elbo loss: 14018313.538574219
Elbo loss: 13170151.506835938
Elbo loss: 12490068.3203125
Elbo loss: 12174545.774658203
Elbo loss: 11835679.22265625
Elbo loss: 11559356.052734375


ValueError: Expected parameter probs (Tensor of shape (1127, 5237)) of distribution NegativeBinomial(total_count: torch.Size([1127, 5237]), probs: torch.Size([1127, 5237])) to satisfy the constraint HalfOpenInterval(lower_bound=0.0, upper_bound=1.0), but found invalid values:
tensor([[1.4882e-02, 1.0651e-03, 6.1118e-04,  ..., 1.5020e-02, 1.5903e-02,
         5.6262e-04],
        [1.8981e-02, 1.9679e-03, 1.0406e-03,  ..., 4.3328e-02, 8.6587e-02,
         3.1753e-03],
        [2.7582e-02, 4.9676e-03, 2.5269e-03,  ..., 2.6313e-02, 3.7693e-02,
         1.9212e-03],
        ...,
        [2.9804e-02, 6.3317e-04, 7.6498e-04,  ..., 1.3111e-02, 1.3234e-01,
         8.8866e-04],
        [8.2196e-02, 2.7713e-03, 4.4483e-03,  ..., 8.5888e-02, 4.6395e-01,
         2.1183e-02],
        [1.2121e-02, 4.0922e-04, 3.4167e-04,  ..., 1.0375e-02, 1.7970e-02,
         2.7147e-04]], grad_fn=<DivBackward0>)
          Trace Shapes:           
           Param Sites:           
          Sample Sites:           
      drug_latents dist      |    
                  value 1127 |    
sideeffect_latents dist      |    
                  value 5237 |    
                UA dist 1127 | 100
                  value 1127 | 100
             alpha dist 1127 |    
                  value 1127 |    
                VA dist 5237 | 100
                  value 5237 | 100
          u2_plate dist      |    
                  value 1127 |    