In [1]:
import os
import numpy as np
import pandas as pd
from mord import OrdinalRidge
from sklearn.linear_model import Ridge
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score, precision_recall_fscore_support, matthews_corrcoef, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC
from scipy.stats import stats

In [2]:
print("PoE: Classification")
model_dir = "/home/bram/jointomicscomp/results/EXCEL POE128 RNAADT 18-12-2021 10:20:52/PoE/"

z128 = np.load(model_dir + 'task1_z_all.npy')

model_dir = '/home/bram/jointomicscomp/results/EXCEL PoE64 RNAADT 21-12-2021 07:33:27/PoE/'

z64 = np.load(model_dir + 'task1_z_all.npy')

# LOAD Split and y

trainInd = np.load("/home/bram/jointomicscomp/data/CELL/trainInd.npy")
validInd = np.load("/home/bram/jointomicscomp/data/CELL/trainInd.npy")
testInd = np.load("/home/bram/jointomicscomp/data/CELL/trainInd.npy")

cellTypesl1 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l1cellTypes.npy")
cellTypesl2 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l2cellTypes.npy")
cellTypesl3 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l3cellTypes.npy")
cellTypel1 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l1cellType.npy")
cellTypel2 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l2cellType.npy")
cellTypel3 = np.load("/home/bram/jointomicscomp/data/CELL/celltype.l3cellType.npy")

PoE: Classification


In [3]:
def run_classifier(data, y, trainInd, validInd, testInd):
    alphas = np.array([1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1.0, 2.0, 5.0, 10., 20.])
    validationPerformance = np.zeros(alphas.shape[0])
    models = []

    for i, a in enumerate(alphas):
        model = LinearSVC(penalty='l2', loss='hinge', C=a, multi_class='ovr', fit_intercept=True, random_state=1)

        # train
        model.fit(data[trainInd], y[trainInd])

        # save so that we don't have to re-train
        models.append(model)

        # evaluate using user-specified criterion
        validationPerformance[i] = evaluate_classification(y[validInd], model.predict(data[validInd]), training=True)

    bestModel = models[np.argmax(validationPerformance)]

    predictions = bestModel.predict(data[testInd]).astype(int)

    classifications = evaluate_classification(y[testInd], predictions)

    print("Accuracy : ", classifications[0])
#     print("Confusion matrix : \n", classifications[5])
    return classifications

In [4]:
def evaluate_classification(y_true, y_pred, training=False):
    if training:
        return accuracy_score(y_true, y_pred)
    
    # returns accuracy, precision, recall, f1, mcc, confusion_matrix
    acc = accuracy_score(y_true, y_pred)
    pr, rc, f1, _ = precision_recall_fscore_support(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)
    spear = stats.spearmanr(y_true, y_pred)
    confMat = confusion_matrix(y_true, y_pred)

    return [acc, pr, rc, f1, mcc, spear, confMat]

In [8]:
print(z128.shape)

(161764, 128)


In [9]:
print("Celltype l1 from common Z128")
run_classifier(z128, cellTypel1, trainInd, validInd, testInd)

Celltype l1 from common Z128
Accuracy :  0.2054632563171316
Confusion matrix : 
 SpearmanrResult(correlation=-0.002190898140205519, pvalue=0.4306158158883826)


[0.2054632563171316,
 array([0.09243028, 0.26044716, 0.16336634, 0.02798635, 0.30390585,
        0.11739794, 0.02484838, 0.04780064]),
 array([0.06437887, 0.12429412, 0.00322281, 0.01439101, 0.47479599,
        0.16195652, 0.1095026 , 0.09823219]),
 array([0.07589553, 0.16827958, 0.00632093, 0.01900788, 0.37059956,
        0.13612356, 0.04050529, 0.0643083 ]),
 0.005614398172271021,
 SpearmanrResult(correlation=-0.002190898140205519, pvalue=0.4306158158883826),
 array([[  696,  1342,    38,   103,  4956,  1675,   998,  1003],
        [ 1928,  4182,    95,   387, 15691,  5329,  3070,  2964],
        [ 1180,  2472,    66,   224,  9552,  3311,  1909,  1765],
        [  155,   357,     5,    41,  1335,   429,   278,   249],
        [ 2223,  4879,   129,   463, 18386,  5907,  3462,  3275],
        [  879,  1792,    42,   163,  6811,  2384,  1394,  1255],
        [  151,   318,    11,    26,  1217,   450,   295,   226],
        [  318,   715,    18,    58,  2551,   822,   466,   539]])]

In [10]:
print("Celltype l2 from common Z128")
run_classifier(z128, cellTypel2, trainInd, validInd, testInd)

Celltype l2 from common Z128




Accuracy :  0.09884089328490843
Confusion matrix : 
 SpearmanrResult(correlation=0.0068470336647507716, pvalue=0.013773009151686413)


  _warn_prf(average, modifier, msg_start, len(result))


[0.09884089328490843,
 array([0.        , 0.03225806, 0.02272727, 0.05111896, 0.27144462,
        0.04701398, 0.        , 0.11592219, 0.00746269, 0.09951986,
        0.        , 0.06623377, 0.        , 0.02435312, 0.10315534,
        0.        , 0.        , 0.        , 0.        , 0.01810976,
        0.11030584, 0.        , 0.        , 0.        , 0.01419624,
        0.01591916, 0.        , 0.        , 0.        , 0.02320186,
        0.        ]),
 array([0.        , 0.00053908, 0.00039463, 0.07101947, 0.19372661,
        0.01463029, 0.        , 0.14191926, 0.01204819, 0.09326679,
        0.        , 0.01157512, 0.        , 0.00674821, 0.00921609,
        0.        , 0.        , 0.        , 0.        , 0.2803579 ,
        0.11255731, 0.        , 0.        , 0.        , 0.03883495,
        0.05371322, 0.        , 0.        , 0.        , 0.0070497 ,
        0.        ]),
 array([0.        , 0.00106045, 0.0007758 , 0.05944798, 0.22609329,
        0.02231604, 0.        , 0.12761014, 0.0092

In [5]:
print("Celltype l1 from common z64")
run_classifier(z64, cellTypel1, trainInd, validInd, testInd)

Celltype l1 from common z64
Accuracy :  0.19588130747237462


[0.19588130747237462,
 array([0.08739272, 0.26743363, 0.16041998, 0.0294599 , 0.30275593,
        0.11682662, 0.01606426, 0.0212766 ]),
 array([1.33752659e-01, 8.98175117e-02, 3.69305142e-01, 6.31800632e-03,
        2.96172916e-01, 1.24048913e-01, 1.48478099e-03, 1.82248952e-04]),
 array([0.10571335, 0.13447248, 0.22367798, 0.01040462, 0.29942824,
        0.12032949, 0.00271831, 0.0003614 ]),
 0.004363340497504681,
 SpearmanrResult(correlation=0.004410719102848404, pvalue=0.11258357449380924),
 array([[ 1446,   962,  3966,    46,  3077,  1289,    24,     1],
        [ 4305,  3022, 12059,   165,  9876,  4143,    61,    15],
        [ 2675,  1761,  7563,   105,  5960,  2375,    36,     4],
        [  352,   254,  1037,    18,   840,   339,     9,     0],
        [ 4840,  3303, 14158,   187, 11469,  4672,    75,    20],
        [ 1846,  1292,  5379,    53,  4289,  1826,    29,     6],
        [  359,   230,   959,    18,   812,   312,     4,     0],
        [  723,   476,  2024,    19,  1

In [6]:
print("Celltype l2 from common z64")
run_classifier(z64, cellTypel2, trainInd, validInd, testInd)

Celltype l2 from common z64




Accuracy :  0.13673595549030215


  _warn_prf(average, modifier, msg_start, len(result))


[0.13673595549030215,
 array([0.        , 0.        , 0.        , 0.        , 0.2622363 ,
        0.03933805, 0.        , 0.        , 0.00060396, 0.5       ,
        0.        , 0.        , 0.        , 0.        , 0.12903226,
        0.        , 0.        , 0.00209018, 0.        , 0.        ,
        0.10897436, 0.0033557 , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ]),
 array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        4.64860690e-01, 3.90075129e-01, 0.00000000e+00, 0.00000000e+00,
        9.63855422e-02, 8.18129755e-05, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 3.46958690e-03, 0.00000000e+00,
        0.00000000e+00, 2.34113712e-02, 0.00000000e+00, 0.00000000e+00,
        1.25721047e-03, 1.55902004e-02, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00

In [None]:
print("Celltype l3 from common z64")
run_classifier(z64, cellTypel3, trainInd, validInd, testInd)