# Multi-View-Majority-Vote-Learning-Algorithms-Direct-Minimization-of-PAC-Bayesian-Bounds

In [35]:
import numpy as np
from sklearn.utils import check_random_state
RAND = check_random_state(42)


from mvb import RandomForestClassifier as RFC


# Multi-view dataset : Nhanes 4 views https://wwwn.cdc.gov/nchs/nhanes/continuousnhanes/overview.aspx?BeginYear=2017
from datasets import Nhanes


In [40]:
datasets  = Nhanes() # load Nhanes dataset
X,y = datasets.getData(return_list = True, domain_datasets=False)
print("number of views",len(X))

#concatenation of te views###############################
X_concat = np.concatenate((X[0],X[1],X[2],X[3]), axis=1)
###############################

X_view1 = X[0]
X_view2 = X[1]
X_view3 = X[2]
X_view4 = X[3]

number of views 4


In [43]:
###############################Optimization Bounds for the concatenat view############################### 
rhos = []
rf = RFC(100,max_features="sqrt",random_state=check_random_state(42))
print("Training...")
_  = rf.fit(X_concat,y)
_, mv_risk = rf.predict(X_concat,y)
stats  = rf.stats()

bounds, stats = rf.bounds(stats=stats)
res_unf = (mv_risk, stats, bounds, -1, -1)
print(bounds)
        
# Optimize Lambda
print("Optimizing lambda...")
(_, rho, bl) = rf.optimize_rho('Lambda')
_, mv_risk = rf.predict(X_concat,y)
stats = rf.aggregate_stats(stats)
bounds, stats = rf.bounds(stats=stats)
res_lam = (mv_risk, stats, bounds, bl, -1)
rhos.append(rho)
print('mv_risk', mv_risk, 'bound', bounds.get('Lambda'))
   
""" Optimize TND """
print("Optimizing TND...")
(_, rho, bl) = rf.optimize_rho('TND')
_, mv_risk = rf.predict(X_concat,y)
stats = rf.aggregate_stats(stats) # update rho-dependent stats
bounds, stats = rf.bounds(stats=stats, spec_bound='TND')
res_tnd = (mv_risk, stats, bounds, bl, -1, -1)
rhos.append(rho)
print('mv_risk', mv_risk, 'bound', bounds.get('TND', -1.0))


print("Optimizing DIS...")
(_, rho, bl, bg) = rf.optimize_rho('DIS')
_, mv_risk = rf.predict(X_concat,y)
stats = rf.aggregate_stats(stats)
bounds, stats = rf.bounds(stats=stats)
res_lam = (mv_risk, stats, bounds, bl, -1)
rhos.append(rho)
print('mv_risk', mv_risk, 'bound', bounds.get('DIS', -1.0))

print("Optimizing DIS with unlabeled")
(_, rho, bl, bg) = rf.optimize_rho('DIS',unlabeled_data=X_concat)
_, mv_risk = rf.predict(X_concat,y)
stats  = rf.stats(unlabeled_data=X_concat)
bounds, stats = rf.bounds(stats=stats)
rhos.append(rho)
print('mv_risk', mv_risk, 'bound', bounds.get('DIS', -1.0))

Training...
{'PBkl': 0.21566265460676134, 'TND': 0.261247355488244, 'CTD': 0.337451933224099, 'CCTND': 0.26446036648574606, 'CCPBB': 0.30771139774346745, 'CCPBUB': 0.28158620867232265, 'CCPBSkl': 0.2644603664857447, 'C1': 0.28069695010013507, 'C2': 0.27312333697704916, 'DIS': 0.2890231053727954}
Optimizing lambda...
mv_risk 0.04926455355293147 bound None
Optimizing TND...
mv_risk 0.026061736067951147 bound 0.25342511064154366
Optimizing DIS...
mv_risk 0.02780194737932462 bound 0.28523070520318683
Optimizing DIS with unlabeled
mv_risk 0.03190387404184791 bound 0.3042799284799073


In [45]:
###############################Optimization Bounds for the view1 ############################### 
rhos = []
rf = RFC(100,max_features="sqrt",random_state=check_random_state(42))
print("Training...")
_  = rf.fit(X_view1,y)
_, mv_risk = rf.predict(X_view1,y)
stats  = rf.stats()

bounds, stats = rf.bounds(stats=stats)
res_unf = (mv_risk, stats, bounds, -1, -1)
print(bounds)
        
# Optimize Lambda
print("Optimizing lambda...")
(_, rho, bl) = rf.optimize_rho('Lambda')
_, mv_risk = rf.predict(X_view1,y)
stats = rf.aggregate_stats(stats)
bounds, stats = rf.bounds(stats=stats)
res_lam = (mv_risk, stats, bounds, bl, -1)
rhos.append(rho)
print('mv_risk', mv_risk, 'bound', bounds.get('Lambda'))
   
""" Optimize TND """
print("Optimizing TND...")
(_, rho, bl) = rf.optimize_rho('TND')
_, mv_risk = rf.predict(X_view1,y)
stats = rf.aggregate_stats(stats) # update rho-dependent stats
bounds, stats = rf.bounds(stats=stats, spec_bound='TND')
res_tnd = (mv_risk, stats, bounds, bl, -1, -1)
rhos.append(rho)
print('mv_risk', mv_risk, 'bound', bounds.get('TND', -1.0))


print("Optimizing DIS...")
(_, rho, bl, bg) = rf.optimize_rho('DIS')
_, mv_risk = rf.predict(X_view1,y)
stats = rf.aggregate_stats(stats)
bounds, stats = rf.bounds(stats=stats)
res_lam = (mv_risk, stats, bounds, bl, -1)
rhos.append(rho)
print('mv_risk', mv_risk, 'bound', bounds.get('DIS', -1.0))

print("Optimizing DIS with unlabeled")
(_, rho, bl, bg) = rf.optimize_rho('DIS',unlabeled_data=X_view1)
_, mv_risk = rf.predict(X_view1,y)
stats  = rf.stats(unlabeled_data=X_view1)
bounds, stats = rf.bounds(stats=stats)
rhos.append(rho)
print('mv_risk', mv_risk, 'bound', bounds.get('DIS', -1.0))

Training...
{'PBkl': 0.21146573848417, 'TND': 0.26387111156732357, 'CTD': 0.33793867140407097, 'CCTND': 0.2670981137235494, 'CCPBB': 0.31022498378251845, 'CCPBUB': 0.28476725975896333, 'CCPBSkl': 0.26709811372354897, 'C1': 0.2805235452850753, 'C2': 0.275080603908077, 'DIS': 0.2895510758834864}
Optimizing lambda...
mv_risk 0.05307644499689246 bound None
Optimizing TND...
mv_risk 0.03393412057178369 bound 0.25532047314213374
Optimizing DIS...
mv_risk 0.03799461363165524 bound 0.284886841887237
Optimizing DIS with unlabeled
mv_risk 0.041640770665009375 bound 0.29486541065071903
