# Multi-View-Majority-Vote-Learning-Algorithms-Direct-Minimization-of-PAC-Bayesian-Bounds

This Notebook contains everything necessary to reproduce the experiments in our paper:  

*Multi-View Majority Vote Learning Algorithms: Direct Minimization of PAC-Bayesian Bounds*

## Imports

In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from collections import OrderedDict

# Scikit-learn
from sklearn import preprocessing
from sklearn.utils import check_random_state
RAND = check_random_state(42)

# torch
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


from mvpb.dNDF import MultiViewBoundsDeepNeuralDecisionForests


# Import data
from mvlearn.datasets import load_UCImultifeature
from data.datasets import (SampleData,
                           MultipleFeatures,
                           Nutrimouse,
                           train_test_split,
                           train_test_merge,
                           s1_s2_split)
from mvpb.util import uniform_distribution

## Load and prepare the multiview datasets

In [2]:
dataset = MultipleFeatures()
X_train, y_train, X_test, y_test = dataset.get_data()
np.unique(y_train)
np.unique(y_test)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [3]:
Xs_train = []
Xs_test = []
for xtr, xts in zip(X_train, X_test):
    scaler = preprocessing.MinMaxScaler().fit(xtr)
    Xs_train.append(scaler.transform(xtr))
    Xs_test.append(scaler.transform(xts))

X_train_concat = np.concatenate(Xs_train, axis=1)
X_test_concat = np.concatenate(Xs_test, axis=1)

In [4]:
mvb = MultiViewBoundsDeepNeuralDecisionForests(nb_estimators=3,
                                               nb_views=len(Xs_train),
                                               depth =3,
                                               used_feature_rate=0.8,
                                               epochs=100)

In [5]:
mvb.fit(Xs_train,y_train)

View 1/6 done!
View 2/6 done!
View 3/6 done!
View 4/6 done!
View 5/6 done!
View 6/6 done!


In [14]:
from mvpb.util import uniform_distribution, kl

m = 3
v = 4

# Initialisation with the uniform distribution
prior_Pv = [uniform_distribution(m)]*v
t = []
for k in range(v):
    t.append(torch.tensor([0.5, 0.3, 0.2]))
    t.append(torch.tensor([0.4, 0.4, 0.2]))
    t.append(torch.tensor([0.2, 0.7, 0.1]))
    t.append(torch.tensor([0.1, 0.1, 0.8]))
posterior_Qv = torch.nn.ParameterList([torch.nn.Parameter(t[k], requires_grad=True) for k in range(v)])

prior_pi = uniform_distribution(v)
posterior_rho = torch.nn.Parameter(torch.tensor([0.2, 0.4, 0.3, 0.1]), requires_grad=True)

lamb = 1.0

softmax_posterior_Qv = [F.softmax(q, dim=0) for q in posterior_Qv]
softmax_posterior_rho = F.softmax(posterior_rho, dim=0)

KL_QP = torch.sum(torch.stack([kl(q, p) * softmax_posterior_rho for q, p in zip(softmax_posterior_Qv, prior_Pv)]))
KL_QP2 = torch.sum(torch.stack([kl(q, p) for q, p in zip(softmax_posterior_Qv, prior_Pv)]) * softmax_posterior_rho)
KL_rhopi = kl(softmax_posterior_rho, prior_pi)

KL_QP, KL_QP2, KL_rhopi

(tensor(0.1060, grad_fn=<SumBackward0>),
 tensor(0.0249, grad_fn=<SumBackward0>),
 tensor(0.0062, grad_fn=<SumBackward0>))

In [6]:
posterior_Qv , posterior_rho = mvb.optimize_rho('Lambda')

Iteration: 0,	 Loss: 0.631015287944532
Iteration: 1,	 Loss: 0.6309955165168127
Iteration: 2,	 Loss: 0.6309579762262016
Iteration: 3,	 Loss: 0.6309045039081526
Iteration: 4,	 Loss: 0.6308364875907567
Iteration: 5,	 Loss: 0.630755575455771
Iteration: 6,	 Loss: 0.6306629858573899
Iteration: 7,	 Loss: 0.6305598285203178
Iteration: 8,	 Loss: 0.6304471785521867
Iteration: 9,	 Loss: 0.6303260113951252
Iteration: 10,	 Loss: 0.6301971202676181
Iteration: 11,	 Loss: 0.6300611080061177
Iteration: 12,	 Loss: 0.6299189637227067
Iteration: 13,	 Loss: 0.6297710983167427
Iteration: 14,	 Loss: 0.6296180773644778
Iteration: 15,	 Loss: 0.6294604738189188
Iteration: 16,	 Loss: 0.6292986712673053
Iteration: 17,	 Loss: 0.6291330676237353
Iteration: 18,	 Loss: 0.6289640760902442
Iteration: 19,	 Loss: 0.6287919160927696
Iteration: 20,	 Loss: 0.6286169656731979
Iteration: 21,	 Loss: 0.6284394506083333
Iteration: 22,	 Loss: 0.628259704189607
Iteration: 23,	 Loss: 0.6280778091369404
Iteration: 24,	 Loss: 0.62789

In [7]:
_, mv_risk = mvb.predict_MV(Xs_test,y_test)
mv_risk

0.16000000000000003

In [8]:
lamb_bound, lamb_views = mvb.bound(bound="Lambda")

self.posterior_rho=tensor([0.1537, 0.1496, 0.1604, 0.2218, 0.1695, 0.1449]),  prior_pi=tensor([0.1667, 0.1667, 0.1667, 0.1667, 0.1667, 0.1667])
KL_rhopi=tensor(0.0107),  KL_QP=tensor(0.0013)
len(emp_rv)=6
0.48270838101348457 tensor(0.0107)


In [9]:
lamb_bound, lamb_views

(1.0, [1.0, 1.0, 1.0, 0.8932343921151265, 1.0, 1.0])

In [10]:
mvb.bound(bound="PBkl")

self.posterior_rho=tensor([0.1537, 0.1496, 0.1604, 0.2218, 0.1695, 0.1449]),  prior_pi=tensor([0.1667, 0.1667, 0.1667, 0.1667, 0.1667, 0.1667])
KL_rhopi=tensor(0.0107),  KL_QP=tensor(0.0013)
len(emp_rv)=6
empirical_gibbs_risk=0.48270838101348457, right_hand_side=tensor(0.0137)


(1.1303020506965373,
 [1.2293559737839155,
  1.2510526512864808,
  1.171988637760185,
  0.8333695602588967,
  1.1176942348792616,
  1.3065048439789029])

In [8]:
print(posterior_Qv[0])
print(posterior_Qv[1])
print(posterior_Qv[2])
print(posterior_Qv[3])
print(posterior_Qv[4])
print(posterior_Qv[5])

Parameter containing:
tensor([0.3761, 0.3374, 0.2865], requires_grad=True)
Parameter containing:
tensor([0.3865, 0.2850, 0.3285], requires_grad=True)
Parameter containing:
tensor([0.3285, 0.3675, 0.3040], requires_grad=True)
Parameter containing:
tensor([0.3288, 0.3928, 0.2785], requires_grad=True)
Parameter containing:
tensor([0.3379, 0.3340, 0.3281], requires_grad=True)
Parameter containing:
tensor([0.3468, 0.2983, 0.3548], requires_grad=True)


In [9]:
posterior_rho

Parameter containing:
tensor([0.0882, 0.0540, 0.0956, 0.4107, 0.1910, 0.1605], requires_grad=True)