In [11]:
import os
import numpy as np
import pandas as pd
from mord import OrdinalRidge
from sklearn.linear_model import Ridge
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score, precision_recall_fscore_support, matthews_corrcoef, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC
from scipy.stats import stats

In [12]:
print("CGAE: Classification")
model_dir = "/home/bram/jointomicscomp/results/EXCEL CGAE128 RNAADT 21-12-2021 00:01:33/CGAE/"

z1 = np.load(model_dir + 'task1_z1_all.npy')
z2 = np.load(model_dir + 'task1_z2_all.npy')

# LOAD Split and y

trainInd = np.load("/home/bram/jointomicscomp/data/CELL/trainInd.npy")
validInd = np.load("/home/bram/jointomicscomp/data/CELL/trainInd.npy")
testInd = np.load("/home/bram/jointomicscomp/data/CELL/trainInd.npy")

cellTypesl1 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l1cellTypes.npy")
cellTypesl2 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l2cellTypes.npy")
cellTypesl3 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l3cellTypes.npy")
cellTypel1 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l1cellType.npy")
cellTypel2 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l2cellType.npy")
cellTypel3 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l3cellType.npy")

CGAE: Classification


In [13]:
def run_classifier(data, y, trainInd, validInd, testInd):
    alphas = np.array([1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 2.0, 5.0, 10., 20.])
    validationPerformance = np.zeros(alphas.shape[0])
    models = []

    for i, a in enumerate(alphas):
        model = LinearSVC(penalty='l2', loss='hinge', C=a, multi_class='ovr', fit_intercept=True, random_state=1)

        # train
        model.fit(data[trainInd], y[trainInd])

        # save so that we don't have to re-train
        models.append(model)

        # evaluate using user-specified criterion
        validationPerformance[i] = evaluate_classification(y[validInd], model.predict(data[validInd]), training=True)

    bestModel = models[np.argmax(validationPerformance)]

    predictions = bestModel.predict(data[testInd]).astype(int)

    classifications = evaluate_classification(y[testInd], predictions)

    print("Accuracy : ", classifications[0])
    print("Confusion matrix : \n", classifications[5])
    return classifications

In [14]:
def evaluate_classification(y_true, y_pred, training=False):
    if training:
        return accuracy_score(y_true, y_pred)
    
    # returns accuracy, precision, recall, f1, mcc, confusion_matrix
    acc = accuracy_score(y_true, y_pred)
    pr, rc, f1, _ = precision_recall_fscore_support(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)
    spear = stats.spearmanr(y_true, y_pred)
    confMat = confusion_matrix(y_true, y_pred)

    return [acc, pr, rc, f1, mcc, spear, confMat]

In [15]:
print(z2.shape)

(161764, 128)


In [16]:
print("Celltype l1 from z1")
run_classifier(z1, cellTypel1, trainInd, validInd, testInd)

Celltype l1 from z1




Accuracy :  0.9771733250907967
Confusion matrix : 
 SpearmanrResult(correlation=0.9643509956192519, pvalue=0.0)


[0.9771733250907967,
 array([0.99907536, 0.9832274 , 0.95918269, 0.97271095, 0.98670893,
        0.98538327, 0.97165992, 0.8684739 ]),
 array([0.99944501, 0.99310468, 0.96733239, 0.95085995, 0.99690115,
        0.99381793, 0.80178174, 0.78822672]),
 array([0.99926015, 0.98814136, 0.9632403 , 0.96166134, 0.99177885,
        0.98958263, 0.8785845 , 0.8264068 ]),
 0.9712647887839004,
 SpearmanrResult(correlation=0.9643509956192519, pvalue=0.0),
 array([[10805,     0,     0,     2,     2,     1,     1,     0],
        [    0, 33414,    92,     0,     3,     3,     2,   132],
        [    0,   195, 19810,     0,     2,    26,     0,   446],
        [    0,     0,     0,  2709,   137,     0,     3,     0],
        [    2,     3,     0,    66, 38604,     0,    48,     1],
        [    2,    12,    28,     0,     1, 14629,     6,    42],
        [    6,    63,    21,     8,   374,    28,  2160,    34],
        [    0,   297,   702,     0,     1,   159,     3,  4325]])]

In [17]:
print("Celltype l2 from z1")
run_classifier(z1, cellTypel2, trainInd, validInd, testInd)

Celltype l2 from z1




Accuracy :  0.8951703886871185
Confusion matrix : 
 SpearmanrResult(correlation=0.9583300198810458, pvalue=0.0)


[0.8951703886871185,
 array([0.97297297, 0.84661118, 0.81429042, 0.90384326, 0.98166134,
        0.95279825, 0.90589888, 0.89789867, 0.61538462, 0.71128834,
        0.51737452, 0.94781367, 0.        , 0.74396593, 0.90586957,
        0.8630137 , 0.88571429, 0.98281787, 0.87209302, 0.82283763,
        0.90963652, 0.00671141, 0.64285714, 0.97712418, 0.98810232,
        0.18862275, 0.88461538, 0.91924227, 0.66071429, 0.83333333,
        0.98377581]),
 array([0.63157895, 0.38382749, 0.97592739, 0.98134512, 0.99376225,
        0.94582839, 0.92077088, 0.97290124, 0.09638554, 0.94853964,
        0.03850575, 0.9790059 , 0.        , 0.66301139, 0.90361054,
        0.39705882, 0.47692308, 0.95652174, 0.72815534, 0.9241585 ,
        0.99755953, 0.00222717, 0.02403204, 0.96141479, 0.9486008 ,
        0.0294255 , 0.19827586, 0.91786959, 0.12211221, 0.72083186,
        1.        ]),
 array([0.76595745, 0.52818991, 0.88781188, 0.9410011 , 0.98767473,
        0.94930053, 0.91327434, 0.93389649, 0.16666

In [18]:
print("Celltype l3 from z1")
run_classifier(z1, cellTypel3, trainInd, validInd, testInd)

Celltype l3 from z1




Accuracy :  0.7868789119851635
Confusion matrix : 
 SpearmanrResult(correlation=0.949866099171934, pvalue=0.0)


[0.7868789119851635,
 array([0.84210526, 1.        , 0.43426295, 0.47802198, 0.54443615,
        0.30357143, 0.54985591, 0.51351351, 0.97631548, 0.94500098,
        0.83533882, 0.89079106, 0.62162162, 0.63029701, 0.29166667,
        0.60494182, 0.51102941, 0.15403423, 0.46153846, 0.        ,
        0.92837705, 0.        , 0.0625    , 0.2173913 , 0.55042918,
        0.57114625, 0.61627347, 0.61111111, 0.18852459, 0.57839625,
        0.5548854 , 0.53630363, 0.89054726, 0.89473684, 0.97952218,
        0.73913043, 0.75067385, 0.01169591, 0.62691706, 0.67724059,
        0.3908046 , 0.32758621, 0.57170172, 0.86710963, 0.75      ,
        0.98235294, 0.20245399, 0.20731707, 0.9047619 , 0.73195876,
        0.83038438, 0.34375   , 0.77981651, 0.77004639, 0.6488764 ,
        0.76481149, 0.14851485, 0.97230321]),
 array([0.5       , 0.2       , 0.12914692, 0.25816024, 0.93222506,
        0.01752577, 0.98325174, 0.00852018, 0.99423751, 0.95116647,
        0.94147038, 0.97758067, 0.27710843, 0.748

In [19]:
print("Celltype l1 from z2")
run_classifier(z2, cellTypel1, trainInd, validInd, testInd)

Celltype l1 from z2




Accuracy :  0.9837261417201144
Confusion matrix : 
 SpearmanrResult(correlation=0.9757267624003877, pvalue=0.0)




[0.9837261417201144,
 array([0.9975986 , 0.99101203, 0.97674869, 0.96612022, 0.98481052,
        0.99360849, 0.94681319, 0.92542116]),
 array([0.99907502, 0.99622541, 0.98461839, 0.93085293, 0.99452536,
        0.99273098, 0.79955457, 0.89101513]),
 array([0.99833626, 0.99361188, 0.98066775, 0.94815874, 0.9896441 ,
        0.99316954, 0.86697525, 0.90789229]),
 0.9795185639372193,
 SpearmanrResult(correlation=0.9757267624003877, pvalue=0.0),
 array([[10801,     0,     2,     1,     3,     1,     2,     1],
        [    2, 33519,    24,     0,     7,     3,     7,    84],
        [    0,    24, 20164,     0,     1,    16,     1,   273],
        [    3,     3,     0,  2652,   184,     1,     6,     0],
        [    5,    14,     8,    87, 38512,     2,    93,     3],
        [    5,    26,    36,     0,     8, 14613,    10,    22],
        [   11,    71,    34,     5,   388,    20,  2154,    11],
        [    0,   166,   376,     0,     3,    51,     2,  4889]])]

In [20]:
print("Celltype l2 from z2")
run_classifier(z2, cellTypel2, trainInd, validInd, testInd)

Celltype l2 from z2




Accuracy :  0.8950699327718106
Confusion matrix : 
 SpearmanrResult(correlation=0.957270622975858, pvalue=0.0)


[0.8950699327718106,
 array([0.88571429, 0.84645669, 0.77261307, 0.88474527, 0.97809513,
        0.92931679, 0.90627107, 0.8873779 , 0.2       , 0.69919647,
        0.82539683, 0.95545213, 0.2       , 0.78113208, 0.91961971,
        0.87155963, 0.88888889, 0.96949153, 0.93333333, 0.84984277,
        0.91837013, 0.01657459, 0.66666667, 0.93831169, 0.96171694,
        0.76984127, 0.83333333, 0.90447461, 0.84761905, 0.89454806,
        0.97932053]),
 array([0.54385965, 0.23180593, 0.97079716, 0.98609066, 0.99076219,
        0.93317517, 0.95931478, 0.97073614, 0.02409639, 0.93258611,
        0.01494253, 0.97843849, 0.02739726, 0.69843948, 0.92291012,
        0.19957983, 0.12307692, 0.95652174, 0.81553398, 0.92117597,
        0.99674604, 0.00668151, 0.00267023, 0.92926045, 0.94688749,
        0.04530593, 0.21551724, 0.89547038, 0.58745875, 0.87909764,
        0.994003  ]),
 array([0.67391304, 0.36394414, 0.86044071, 0.93267296, 0.98438791,
        0.93124198, 0.93203883, 0.92718722, 0.04301

In [21]:
print("Celltype l3 from z2")
run_classifier(z2, cellTypel3, trainInd, validInd, testInd)

Celltype l3 from z2




Accuracy :  0.7856348041109652
Confusion matrix : 
 SpearmanrResult(correlation=0.9495538252798222, pvalue=0.0)


[0.7856348041109652,
 array([0.84210526, 0.85714286, 0.53623188, 0.44240401, 0.52554745,
        0.25423729, 0.55336141, 0.63380282, 0.97285094, 0.92419825,
        0.87280986, 0.88001516, 0.28125   , 0.57796591, 0.30434783,
        0.61146195, 0.503125  , 0.40983607, 0.78431373, 0.1       ,
        0.92992238, 0.12087912, 0.30769231, 0.38888889, 0.66393443,
        0.54314003, 0.5443415 , 0.68536585, 0.22477064, 0.61899002,
        0.57583292, 0.67108168, 0.87850467, 0.76923077, 0.96283784,
        0.77586207, 0.78142655, 0.003003  , 0.63317337, 0.66807353,
        0.20087336, 0.26993865, 0.66216216, 0.83443709, 0.5       ,
        0.95564516, 0.59036145, 0.47133758, 0.89473684, 0.79661017,
        0.80625383, 0.75      , 0.75968992, 0.85918513, 0.64678179,
        0.75480059, 0.63333333, 0.97364568]),
 array([0.5       , 0.24      , 0.04383886, 0.26211672, 0.92071611,
        0.01546392, 0.98196341, 0.04035874, 0.99200974, 0.9400949 ,
        0.96002855, 0.97276156, 0.10843373, 0.746