In [2]:
import os
import numpy as np
import pandas as pd
from mord import OrdinalRidge
from sklearn.linear_model import Ridge
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score, precision_recall_fscore_support, matthews_corrcoef, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC
from scipy.stats import stats

In [None]:
print("MOFA+: Classification")
model_dir = "/home/bram/jointomicscomp/results/EXCEL MOFA+128 RNAADT 16-12-2021 23:35:11/MOFA+/"

z = np.load(model_dir + 'task1_z.npy').T
z_from_pseudo_w1 = np.load(model_dir + 'task1_z_from_pseudoinv_w1.npy')
z_from_pseudo_w2 = np.load(model_dir + 'task1_z_from_pseudoinv_w2.npy')

model_dir = '/home/bram/jointomicscomp/results/EXCEL MOFA+64 RNAADT 17-12-2021 07:35:33/MOFA+/'

z64 = np.load(model_dir + 'task1_z.npy').T
z64_from_pseudo_w1 = np.load(model_dir + 'task1_z_from_pseudoinv_w1.npy')
z64_from_pseudo_w2 = np.load(model_dir + 'task1_z_from_pseudoinv_w2.npy')

# LOAD Split and y

trainInd = np.load("/home/bram/jointomicscomp/data/CELL/trainInd.npy")
validInd = np.load("/home/bram/jointomicscomp/data/CELL/trainInd.npy")
testInd = np.load("/home/bram/jointomicscomp/data/CELL/trainInd.npy")

cellTypesl1 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l1cellTypes.npy")
cellTypesl2 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l2cellTypes.npy")
cellTypesl3 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l3cellTypes.npy")
cellTypel1 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l1cellType.npy")
cellTypel2 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l2cellType.npy")
cellTypel3 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l3cellType.npy")

In [4]:
def run_classifier(data, y, trainInd, validInd, testInd):
    alphas = np.array([1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 2.0, 5.0, 10., 20.])
    validationPerformance = np.zeros(alphas.shape[0])
    models = []

    for i, a in enumerate(alphas):
        model = LinearSVC(penalty='l2', loss='hinge', C=a, multi_class='ovr', fit_intercept=True, random_state=1)

        # train
        model.fit(data[trainInd], y[trainInd])

        # save so that we don't have to re-train
        models.append(model)

        # evaluate using user-specified criterion
        validationPerformance[i] = evaluate_classification(y[validInd], model.predict(data[validInd]), training=True)

    bestModel = models[np.argmax(validationPerformance)]

    predictions = bestModel.predict(data[testInd]).astype(int)

    classifications = evaluate_classification(y[testInd], predictions)

    print("Accuracy : ", classifications[0])
    print("Confusion matrix : \n", classifications[5])
    return classifications

In [5]:
print(z.shape)

(145587, 128)


In [6]:
def evaluate_classification(y_true, y_pred, training=False):
    if training:
        return accuracy_score(y_true, y_pred)
    
    # returns accuracy, precision, recall, f1, mcc, confusion_matrix
    acc = accuracy_score(y_true, y_pred)
    pr, rc, f1, _ = precision_recall_fscore_support(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)
    spear = stats.spearmanr(y_true, y_pred)
    confMat = confusion_matrix(y_true, y_pred)

    return [acc, pr, rc, f1, mcc, spear, confMat]

In [7]:
print("Celltype l1 from common Z")
run_classifier(z, cellTypel1, trainInd, validInd, testInd)

Celltype l1 from common Z




Accuracy :  0.21902480488370296
Confusion matrix : 
 SpearmanrResult(correlation=-0.0024153956320516912, pvalue=0.3849031030029981)


[0.21902480488370296,
 array([0.08498548, 0.25911291, 0.15856893, 0.02228164, 0.30076617,
        0.10967914, 0.02080805, 0.04122832]),
 array([0.06225141, 0.25141176, 0.13721373, 0.00877501, 0.38420618,
        0.08383152, 0.04454343, 0.0264261 ]),
 array([0.07186332, 0.25520425, 0.14712042, 0.01259129, 0.33740404,
        0.09502907, 0.02836544, 0.03220791]),
 0.00014338926287660482,
 SpearmanrResult(correlation=-0.0024153956320516912, pvalue=0.3849031030029981),
 array([[  673,  2734,  1480,    90,  4112,   950,   480,   292],
        [ 2041,  8459,  4602,   297, 12846,  2939,  1527,   935],
        [ 1239,  5195,  2810,   191,  7765,  1795,   933,   551],
        [  168,   752,   384,    25,  1070,   244,   135,    71],
        [ 2366,  9659,  5368,   324, 14878,  3361,  1691,  1077],
        [  941,  3756,  1966,   133,  5670,  1234,   637,   383],
        [  163,   733,   370,    21,   974,   250,   120,    63],
        [  328,  1358,   741,    41,  2152,   478,   244,   145]])]

In [8]:
print("Celltype l2 from common Z")
run_classifier(z, cellTypel2, trainInd, validInd, testInd)

Celltype l2 from common Z




Accuracy :  0.1415810215593849
Confusion matrix : 
 SpearmanrResult(correlation=-0.0014684930961184521, pvalue=0.5973153957544459)


  _warn_prf(average, modifier, msg_start, len(result))


[0.1415810215593849,
 array([0.        , 0.        , 0.02102141, 0.04778761, 0.2587432 ,
        0.03676768, 0.        , 0.10775776, 0.        , 0.094013  ,
        0.02720577, 0.06923751, 0.        , 0.02771363, 0.06948102,
        0.        , 0.        , 0.        , 0.        , 0.01497696,
        0.10642378, 0.03030303, 0.        , 0.        , 0.01111328,
        0.01960784, 0.        , 0.03821656, 0.        , 0.01397206,
        0.        ]),
 array([0.        , 0.        , 0.06432518, 0.00441826, 0.38721559,
        0.0179913 , 0.        , 0.11321414, 0.        , 0.11241103,
        0.05201149, 0.05379029, 0.        , 0.00506116, 0.09725686,
        0.        , 0.        , 0.        , 0.        , 0.01661696,
        0.02462653, 0.00445434, 0.        , 0.        , 0.03255283,
        0.00093414, 0.        , 0.00298656, 0.        , 0.0024674 ,
        0.        ]),
 array([0.        , 0.        , 0.0316874 , 0.00808868, 0.31020369,
        0.02416036, 0.        , 0.11041858, 0.     

In [None]:
print("Celltype l3 from common Z")
run_classifier(z, cellTypel3, trainInd, validInd, testInd)

Celltype l3 from common Z




In [None]:
print("Celltype l1 from z1")
run_classifier(z_from_pseudo_w1, cellTypel1, trainInd, validInd, testInd)

In [None]:
print("Celltype l2 from z1")
run_classifier(z_from_pseudo_w1, cellTypel2, trainInd, validInd, testInd)

In [None]:
print("Celltype l3 from z1")
run_classifier(z_from_pseudo_w1, cellTypel3, trainInd, validInd, testInd)

In [None]:
print("Celltype l1 from z2")
run_classifier(z_from_pseudo_w2, cellTypel1, trainInd, validInd, testInd)

In [None]:
print("Celltype l2 from z2")
run_classifier(z_from_pseudo_w2, cellTypel2, trainInd, validInd, testInd)

In [None]:
print("Celltype l3 from z2")
run_classifier(z_from_pseudo_w2, cellTypel3, trainInd, validInd, testInd)

In [None]:
print("Celltype l1 from common Z_64")
run_classifier(z64, cellTypel1, trainInd, validInd, testInd)

In [None]:
print("Celltype l2 from common Z_64")
run_classifier(z64, cellTypel2, trainInd, validInd, testInd)

In [None]:
print("Celltype l3 from common Z_64")
run_classifier(z64, cellTypel3, trainInd, validInd, testInd)

In [None]:
print("Celltype l1 from z1_64")
run_classifier(z64_from_pseudo_w1, cellTypel1, trainInd, validInd, testInd)

In [None]:
print("Celltype l2 from z1_64")
run_classifier(z64_from_pseudo_w1, cellTypel2, trainInd, validInd, testInd)

In [None]:
print("Celltype l3 from z1_64")
run_classifier(z64_from_pseudo_w1, cellTypel3, trainInd, validInd, testInd)

In [None]:
print("Celltype l1 from z2_64")
run_classifier(z64_from_pseudo_w2, cellTypel1, trainInd, validInd, testInd)

In [None]:
print("Celltype l2 from z2_64")
run_classifier(z64_from_pseudo_w2, cellTypel2, trainInd, validInd, testInd)

In [None]:
print("Celltype l3 from z2_64")
run_classifier(z64_from_pseudo_w2, cellTypel3, trainInd, validInd, testInd)