In [71]:
import numpy as np
import pandas as pd
from mord import OrdinalRidge
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score, precision_recall_fscore_support, matthews_corrcoef, confusion_matrix

In [None]:
# From baseline

def evaluate_classification(y_true, y_pred):
    # returns accuracy, precision, recall, f1, mcc, confusion_matrix

    acc = accuracy_score(y_true, y_pred)
    pr, rc, f1, _ = precision_recall_fscore_support(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)
    confMat = confusion_matrix(y_true, y_pred)

    return [acc, pr, rc, f1, mcc, confMat]

In [72]:
folder = "/home/bram/jointomicscomp/results/trained CGAE task 2 STAD"
ctype = "STAD"

In [73]:
x_ctype_train_file = "/home/bram/jointomicscomp/data/{}/{}_GE_train.npy".format(ctype, ctype)
y_ctype_train_file = "/home/bram/jointomicscomp/data/{}/{}_ME_train.npy".format(ctype, ctype)
x_ctype_valid_file = "/home/bram/jointomicscomp/data/{}/{}_GE_valid.npy".format(ctype, ctype)
y_ctype_valid_file = "/home/bram/jointomicscomp/data/{}/{}_ME_valid.npy".format(ctype, ctype)
x_ctype_test_file = "/home/bram/jointomicscomp/data/{}/{}_GE_test.npy".format(ctype, ctype)
y_ctype_test_file = "/home/bram/jointomicscomp/data/{}/{}_ME_test.npy".format(ctype, ctype)

# For latent feature extraction
GEtrainctype = np.load(x_ctype_train_file)
GEvalidctype = np.load(x_ctype_valid_file)
GEtestctype = np.load(x_ctype_test_file)

In [74]:
y_train = np.load("/home/bram/jointomicscomp/data/{}/{}_train_stageType.npy".format(ctype, ctype))
y_valid = np.load("/home/bram/jointomicscomp/data/{}/{}_valid_stageType.npy".format(ctype, ctype))
y_test = np.load("/home/bram/jointomicscomp/data/{}/{}_test_stageType.npy".format(ctype, ctype))



In [75]:
# Z1 for GE and z2 for ME
z1 = np.load("{}/CGAE/task2_z1.npy".format(folder))
z2 = np.load("{}/CGAE/task2_z2.npy".format(folder))

In [76]:
# Create labels from stageTypes and stageTypes
latent_train1 = z1[:len(GEtrainctype)]
latent_valid1 = z1[len(latent_train1):(len(GEtrainctype) + len(GEvalidctype))]
latent_test1 = z1[(len(latent_train1) + len(latent_valid1)):(len(GEtrainctype) + len(GEvalidctype) + len(GEtestctype))]

print(latent_train1.shape)
print(latent_valid1.shape)
print(latent_test1.shape)

latent_train2 = z2[:len(GEtrainctype)]
latent_valid2 = z2[len(latent_train2):(len(GEtrainctype) + len(GEvalidctype))]
latent_test2 = z2[(len(latent_train2) + len(latent_valid2)):(len(GEtrainctype) + len(GEvalidctype) + len(GEtestctype))]

print(latent_train2.shape)
print(latent_valid2.shape)
print(latent_test2.shape)

(292, 128)
(33, 128)
(37, 128)
(292, 128)
(33, 128)
(37, 128)


In [77]:
np.concatenate((y_train, y_valid)).shape

(325,)

In [82]:
print("CGAE: Stage predictions from GE using cancer type {}".format(ctype))
validationPerformance = np.zeros(alphas.shape[0])
models = []
alphas = np.array([1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 2.0, 5.0, 10., 20.])

for i, a in enumerate(alphas):
    model = OrdinalRidge(alpha=a, fit_intercept=True, normalize=False, random_state=1)

    # train
    model.fit(latent_train1, latent_train2)

    # save so that we don't have to re-train
    models.append(model)

    # evaluate using user-specified criterion
    validationPerformance[i] = evaluate_classification(latent_valid2, model.predict(latent_valid1))[ind]

bestModel = models[np.argmax(validationPerformance)]

predictions = bestModel.predict(x_test).astype(int)

classifications = evaluate_classification(y_test, predictions)

print(classifications)



CGAE: Stage predictions from GE using cancer type STAD
Accuracies:  [32.43, 35.14, 37.84, 43.24, 43.24, 43.24, 43.24, 45.95, 43.24, 43.24]
Highest accuracy is alpha 5.0 with accuracy 45.95, that has the following stage predictions:
 [1. 1. 2. 1. 2. 2. 1. 2. 1. 2. 2. 2. 1. 2. 2. 2. 1. 1. 2. 1. 1. 2. 1. 1.
 2. 1. 1. 2. 1. 1. 2. 2. 1. 1. 2. 2. 2.]
