## MVAE

In [16]:
import numpy as np
import pandas as pd
from mord import OrdinalRidge
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score, precision_recall_fscore_support, matthews_corrcoef, confusion_matrix

In [17]:
# From baseline

def evaluate_classification(y_true, y_pred):
    # returns accuracy, precision, recall, f1, mcc, confusion_matrix

    acc = accuracy_score(y_true, y_pred)
    pr, rc, f1, _ = precision_recall_fscore_support(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)
    confMat = confusion_matrix(y_true, y_pred)

    return [acc, pr, rc, f1, mcc, confMat]

In [9]:
folder = "/home/bram/jointomicscomp/results/brca2_geme-MVAE 14-09-2021 09_32_49/brca2_geme 14-09-2021 09:32:49"
ctype = "BRCA"

In [10]:
x_ctype_train_file = "/home/bram/jointomicscomp/data/{}/{}_GE_train.npy".format(ctype, ctype)
y_ctype_train_file = "/home/bram/jointomicscomp/data/{}/{}_ME_train.npy".format(ctype, ctype)
x_ctype_valid_file = "/home/bram/jointomicscomp/data/{}/{}_GE_valid.npy".format(ctype, ctype)
y_ctype_valid_file = "/home/bram/jointomicscomp/data/{}/{}_ME_valid.npy".format(ctype, ctype)
x_ctype_test_file = "/home/bram/jointomicscomp/data/{}/{}_GE_test.npy".format(ctype, ctype)
y_ctype_test_file = "/home/bram/jointomicscomp/data/{}/{}_ME_test.npy".format(ctype, ctype)

# For latent feature extraction
GEtrainctype = np.load(x_ctype_train_file)
GEvalidctype = np.load(x_ctype_valid_file)
GEtestctype = np.load(x_ctype_test_file)

In [11]:
y_train = np.load("/home/bram/jointomicscomp/data/{}/{}_train_stageType.npy".format(ctype, ctype))
y_valid = np.load("/home/bram/jointomicscomp/data/{}/{}_valid_stageType.npy".format(ctype, ctype))
y_test = np.load("/home/bram/jointomicscomp/data/{}/{}_test_stageType.npy".format(ctype, ctype))



In [14]:
# Z1 for GE and z2 for ME
z_moe = np.load("{}/MoE/task2_z.npy".format(folder))
z_poe = np.load("{}/PoE/task2_z.npy".format(folder))

In [20]:
# Create labels from stageTypes and stageTypes
latent_train1 = z_moe[:len(GEtrainctype)]
latent_valid1 = z_moe[len(latent_train1):(len(GEtrainctype) + len(GEvalidctype))]
latent_test1 = z_moe[(len(latent_train1) + len(latent_valid1)):(len(GEtrainctype) + len(GEvalidctype) + len(GEtestctype))]

print(latent_train1.shape)
print(latent_valid1.shape)
print(latent_test1.shape)

latent_train2 = z_poe[:len(GEtrainctype)]
latent_valid2 = z_poe[len(latent_train2):(len(GEtrainctype) + len(GEvalidctype))]
latent_test2 = z_poe[(len(latent_train2) + len(latent_valid2)):(len(GEtrainctype) + len(GEvalidctype) + len(GEtestctype))]

print(latent_train2.shape)
print(latent_valid2.shape)
print(latent_test2.shape)

(608, 128)
(68, 128)
(76, 128)
(608, 128)
(68, 128)
(76, 128)


In [21]:
print("Accuracies from Mixture-of-Experts using cancer type {}".format(ctype))
alphas = np.array([1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 2.0, 5.0, 10., 20.])

# if criterion == 'acc':
#     ind = 0
# elif criterion == 'pr':
#     ind = 1
# elif criterion == 'rc':
#     ind = 2
# elif criterion == 'f1':
#     ind = 3
# else:
#     assert criterion == 'mcc'
#     ind = 4



validationPerformance = np.zeros(alphas.shape[0])
models = []

for i, a in enumerate(alphas):
    model = OrdinalRidge(alpha=a, fit_intercept=True, normalize=False, random_state=1)

    # train
    model.fit(latent_train1, y_train)

    # save so that we don't have to re-train
    models.append(model)

    # evaluate using user-specified criterion
    validationPerformance[i] = evaluate_classification(y_valid, model.predict(latent_valid1))[0]

bestModel = models[np.argmax(validationPerformance)]

predictions = bestModel.predict(latent_test1).astype(int)

classifications1 = evaluate_classification(y_test, predictions)




Accuracies from Mixture-of-Experts using cancer type BRCA


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


In [22]:
print("Accuracy : ", classifications1[0])
print("Confusion matrix : ", classifications1[5])

Accuracy :  0.5657894736842105
Confusion matrix :  [[ 0 12  0  0]
 [ 0 43  0  0]
 [ 0 20  0  0]
 [ 0  1  0  0]]


In [24]:
print("Accuracies from Product-of-Experts")
alphas = np.array([1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 2.0, 5.0, 10., 20.])

# if criterion == 'acc':
#     ind = 0
# elif criterion == 'pr':
#     ind = 1
# elif criterion == 'rc':
#     ind = 2
# elif criterion == 'f1':
#     ind = 3
# else:
#     assert criterion == 'mcc'
#     ind = 4



validationPerformance = np.zeros(alphas.shape[0])
models = []

for i, a in enumerate(alphas):
    model = OrdinalRidge(alpha=a, fit_intercept=True, normalize=False, random_state=1)

    # train
    model.fit(latent_train2, y_train)

    # save so that we don't have to re-train
    models.append(model)

    # evaluate using user-specified criterion
    validationPerformance[i] = evaluate_classification(y_valid, model.predict(latent_valid2))[0]

bestModel = models[np.argmax(validationPerformance)]

predictions = bestModel.predict(latent_test2).astype(int)

classifications2 = evaluate_classification(y_test, predictions)


Accuracies from Product-of-Experts


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  _warn_prf(average, modifier, msg_start, len(result))


In [25]:
print("Accuracy : ", classifications2[0])
print("Confusion matrix : ", classifications2[5])

Accuracy :  0.5789473684210527
Confusion matrix :  [[ 0 12  0  0]
 [ 0 43  0  0]
 [ 0 19  1  0]
 [ 0  1  0  0]]
