In [71]:
import numpy as np
import pandas as pd
from mord import OrdinalRidge

In [72]:
folder = "/home/bram/jointomicscomp/results/trained CGAE task 2 STAD"
ctype = "STAD"

In [73]:
x_ctype_train_file = "/home/bram/jointomicscomp/data/{}/{}_GE_train.npy".format(ctype, ctype)
y_ctype_train_file = "/home/bram/jointomicscomp/data/{}/{}_ME_train.npy".format(ctype, ctype)
x_ctype_valid_file = "/home/bram/jointomicscomp/data/{}/{}_GE_valid.npy".format(ctype, ctype)
y_ctype_valid_file = "/home/bram/jointomicscomp/data/{}/{}_ME_valid.npy".format(ctype, ctype)
x_ctype_test_file = "/home/bram/jointomicscomp/data/{}/{}_GE_test.npy".format(ctype, ctype)
y_ctype_test_file = "/home/bram/jointomicscomp/data/{}/{}_ME_test.npy".format(ctype, ctype)

# For latent feature extraction
GEtrainctype = np.load(x_ctype_train_file)
GEvalidctype = np.load(x_ctype_valid_file)
GEtestctype = np.load(x_ctype_test_file)

In [74]:
y_train = np.load("/home/bram/jointomicscomp/data/{}/{}_train_stageType.npy".format(ctype, ctype))
y_valid = np.load("/home/bram/jointomicscomp/data/{}/{}_valid_stageType.npy".format(ctype, ctype))
y_test = np.load("/home/bram/jointomicscomp/data/{}/{}_test_stageType.npy".format(ctype, ctype))



In [75]:
# Z1 for GE and z2 for ME
z1 = np.load("{}/CGAE/task2_z1.npy".format(folder))
z2 = np.load("{}/CGAE/task2_z2.npy".format(folder))

In [76]:
# Create labels from stageTypes and stageTypes
latent_train1 = z1[:len(GEtrainctype)]
latent_valid1 = z1[len(latent_train1):(len(GEtrainctype) + len(GEvalidctype))]
latent_test1 = z1[(len(latent_train1) + len(latent_valid1)):(len(GEtrainctype) + len(GEvalidctype) + len(GEtestctype))]

print(latent_train1.shape)
print(latent_valid1.shape)
print(latent_test1.shape)

latent_train2 = z2[:len(GEtrainctype)]
latent_valid2 = z2[len(latent_train2):(len(GEtrainctype) + len(GEvalidctype))]
latent_test2 = z2[(len(latent_train2) + len(latent_valid2)):(len(GEtrainctype) + len(GEvalidctype) + len(GEtestctype))]

print(latent_train2.shape)
print(latent_valid2.shape)
print(latent_test2.shape)

(292, 128)
(33, 128)
(37, 128)
(292, 128)
(33, 128)
(37, 128)


In [77]:
np.concatenate((y_train, y_valid)).shape

(325,)

In [82]:
print("CGAE: Stage predictions from GE using cancer type {}".format(ctype))
accuracies = []
models = []
alphas = np.array([1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 2.0, 5.0, 10., 20.])
for i, a in enumerate(alphas):
    model = OrdinalRidge(alpha=a, fit_intercept=True, normalize=False, random_state=1)

    # Train
    model.fit(np.vstack((latent_train1, latent_valid1)), np.concatenate((y_train, y_valid)))

    y_pred = model.predict(latent_test1)
    
    # save so that we don't have to re-train
    models.append(model)

    n = len(y_pred)
    correct = 0

    for t, p in zip(y_test, y_pred):
        if t == p:
            correct += 1

    accuracy = (correct / n) * 100
    accuracies.append(round(accuracy, 2))

print("Accuracies: ", accuracies)
bestModel = models[np.argmax(accuracies)]
print("Highest accuracy is alpha {} with accuracy {}, that has the following stage predictions:\n {}".format(alphas[np.argmax(accuracies)], accuracies[np.argmax(accuracies)], bestModel.predict(latent_test1)))



CGAE: Stage predictions from GE using cancer type STAD
Accuracies:  [32.43, 35.14, 37.84, 43.24, 43.24, 43.24, 43.24, 45.95, 43.24, 43.24]
Highest accuracy is alpha 5.0 with accuracy 45.95, that has the following stage predictions:
 [1. 1. 2. 1. 2. 2. 1. 2. 1. 2. 2. 2. 1. 2. 2. 2. 1. 1. 2. 1. 1. 2. 1. 1.
 2. 1. 1. 2. 1. 1. 2. 2. 1. 1. 2. 2. 2.]


In [79]:
y_pred

array([1., 1., 2., 1., 2., 2., 2., 1., 1., 2., 2., 2., 2., 2., 2., 2., 1.,
       1., 2., 1., 1., 2., 1., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1., 1.,
       2., 2., 2.])

In [84]:
print("CGAE: Stage predictions from ME using cancer type {}".format(ctype))
accuracies = []
models = []
alphas = np.array([1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 2.0, 5.0, 10., 20.])
for i, a in enumerate(alphas):
    model = OrdinalRidge(alpha=a, fit_intercept=True, normalize=False, random_state=1)

    # Train
    model.fit(np.vstack((latent_train2, latent_valid2)), np.concatenate((y_train, y_valid)))

    y_pred = model.predict(latent_test2)
    
    # save so that we don't have to re-train
    models.append(model)

    n = len(y_pred)
    correct = 0

    for t, p in zip(y_test, y_pred):
        if t == p:
            correct += 1

    accuracy = (correct / n) * 100
    accuracies.append(round(accuracy, 2))

print("Accuracies: ", accuracies)
bestModel = models[np.argmax(accuracies)]
print("Highest accuracy is alpha {} with accuracy {}, that has the following stage predictions:\n {}".format(alphas[np.argmax(accuracies)], accuracies[np.argmax(accuracies)], bestModel.predict(latent_test2)))


CGAE: Stage predictions from ME using cancer type STAD
Accuracies:  [40.54, 40.54, 40.54, 45.95, 45.95, 43.24, 40.54, 40.54, 43.24, 43.24]
Highest accuracy is alpha 0.1 with accuracy 45.95, that has the following stage predictions:
 [1. 2. 2. 2. 1. 2. 1. 2. 1. 2. 2. 1. 1. 1. 1. 1. 1. 2. 2. 2. 1. 2. 2. 1.
 2. 0. 1. 1. 2. 1. 1. 2. 1. 2. 1. 2. 1.]


In [81]:
y_pred

array([1., 1., 2., 2., 2., 2., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 1.,
       2., 2., 2., 1., 2., 1., 1., 1., 1., 1., 2., 2., 1., 2., 1., 1., 2.,
       2., 2., 2.])