## Set path

In [1]:
import os
os.chdir(os.path.join(os.getcwd(), 'code/'))
outputs_dir = os.path.abspath(os.path.join(os.getcwd(), '../outputs'))

## Load necessary libraries

In [2]:
from evaluation_utils import *
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import anndata

## Load prediction and ground truth

In [3]:
predicted_protein_expression = np.load(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/moETM/recon_mod_epoch499.npy"))
true_protein_expression = np.load(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/moETM/gt_mod_epoch499.npy"))
test_adata_mod2 = anndata.read(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/moETM/test_protein_expression_data.h5ad"))

predicted_protein_expression = pd.DataFrame(predicted_protein_expression, columns=test_adata_mod2.var.index, index=test_adata_mod2.obs.index)
true_protein_expression = pd.DataFrame(true_protein_expression, columns=test_adata_mod2.var.index, index=test_adata_mod2.obs.index)

predicted_protein_expression, true_protein_expression

(                    ADT_CD102_A0104  ADT_CD103_A0201  ADT_CD106_A0226  \
 index                                                                   
 AAACCCAAGAATCTAG-2        35.327980         2.814528         3.351876   
 AAACCCACACCGGAAA-2        55.776520         6.354218         2.641703   
 AAACCCACACTACTTT-2        30.585234         2.919079         7.795564   
 AAACCCAGTAGGCAAC-2        58.139244         8.036505         2.722221   
 AAACCCAGTCTCAGGC-2        91.337967         9.390227         9.199080   
 ...                             ...              ...              ...   
 TTTGTTGGTAAGATCA-2        45.156380         7.582898         2.876624   
 TTTGTTGGTCAGACTT-2        33.443874         6.775133         7.979715   
 TTTGTTGGTGTTTACG-2        73.572449         6.843086         0.803989   
 TTTGTTGTCAGAATAG-2        67.022041        10.015942         2.294858   
 TTTGTTGTCGAGTGGA-2        80.500481         9.365094         5.632768   
 
                     ADT_CD115(CSF-1

## Get protein names and cell names

In [4]:
protein_names = np.array(true_protein_expression.columns)
cell_names = np.array(test_adata_mod2.obs.index)

protein_names, cell_names

(array(['ADT_CD102_A0104', 'ADT_CD103_A0201', 'ADT_CD106_A0226',
        'ADT_CD115(CSF-1R)_A0105', 'ADT_CD117(c-Kit)_A0012',
        'ADT_CD11a_A0595', 'ADT_CD11c_A0106', 'ADT_CD122(IL-2Rb)_A0227',
        'ADT_CD127(IL-7Ra)_A0198', 'ADT_CD134(OX-40)_A0195',
        'ADT_CD135_A0098', 'ADT_CD137_A0194', 'ADT_CD140a_A0573',
        'ADT_CD14_A0424', 'ADT_CD15(SSEA-1)_A0076',
        'ADT_CD150(SLAM)_A0203', 'ADT_CD16-32_A0109',
        'ADT_CD169(Siglec-1)_A0440', 'ADT_CD172a(SIRPa)_A0422',
        'ADT_CD183(CXCR3)_A0228', 'ADT_CD184(CXCR4)_A0444',
        'ADT_CD192(CCR2)_A0426', 'ADT_CD195(CCR5)_A0376',
        'ADT_CD196(CCR6)_A0225', 'ADT_CD197(CCR7)_A0377', 'ADT_CD19_A0093',
        'ADT_CD200(OX2)_A0079', 'ADT_CD201(EPCR)_A0439',
        'ADT_CD204(Msr1)_A0448', 'ADT_CD206(MMR)_A0173', 'ADT_CD20_A0192',
        'ADT_CD21-CD35(CR2-CR1)_A0107', 'ADT_CD223(LAG-3)_A0378',
        'ADT_CD23_A0108', 'ADT_CD24_A0212', 'ADT_CD25_A0097',
        'ADT_CD274(B7-H1_PD-L1)_A0190', 'ADT_CD278

## Calculate PCC at the protein level

In [5]:
p_corrs_protein_level = calculate_PCC_array_protein_level(protein_names, predicted_protein_expression, true_protein_expression)
p_corrs_protein_level

Unnamed: 0,PCC
ADT_CD102_A0104,0.564742
ADT_CD103_A0201,0.673869
ADT_CD106_A0226,0.631205
ADT_CD115(CSF-1R)_A0105,0.638990
ADT_CD117(c-Kit)_A0012,0.631020
...,...
ADT_TER-119-ErythroidCells_A0122,0.048980
ADT_Tim-4_A0567,0.595001
ADT_XCR1_A0568,0.392272
ADT_anti-P2RY12_A0415,0.050022


## Perform Z-score transformation at the protein level

In [6]:
true_protein_expression_zscore = pd.DataFrame(StandardScaler().fit_transform(true_protein_expression.values), index=true_protein_expression.index, 
                                              columns=true_protein_expression.columns)
predicted_protein_expression_zscore = pd.DataFrame(StandardScaler().fit_transform(predicted_protein_expression.values), 
                                                   index=predicted_protein_expression.index, columns=predicted_protein_expression.columns)    
true_protein_expression_zscore, predicted_protein_expression_zscore

(                    ADT_CD102_A0104  ADT_CD103_A0201  ADT_CD106_A0226  \
 index                                                                   
 AAACCCAAGAATCTAG-2         0.205191        -0.313454        -0.235714   
 AAACCCACACCGGAAA-2         0.418915        -0.382112        -0.172739   
 AAACCCACACTACTTT-2        -0.329120        -0.244796        -0.235714   
 AAACCCAGTAGGCAAC-2         0.552493        -0.038822         0.268084   
 AAACCCAGTCTCAGGC-2         0.178476        -0.450770        -0.109765   
 ...                             ...              ...              ...   
 TTTGTTGGTAAGATCA-2        -0.142111        -0.519428        -0.109765   
 TTTGTTGGTCAGACTT-2        -0.329120        -0.107480        -0.424639   
 TTTGTTGGTGTTTACG-2         1.781407        -0.519428        -0.046790   
 TTTGTTGTCAGAATAG-2        -0.729852        -0.038822        -0.046790   
 TTTGTTGTCGAGTGGA-2         0.418915        -0.519428        -0.109765   
 
                     ADT_CD115(CSF-1

## Calculate RMSE at the protein level

In [7]:
rmses_protein_level = calculate_RMSE_array_protein_level(protein_names, predicted_protein_expression_zscore, true_protein_expression_zscore)
rmses_protein_level

Unnamed: 0,RMSE
ADT_CD102_A0104,0.933014
ADT_CD103_A0201,0.807628
ADT_CD106_A0226,0.858831
ADT_CD115(CSF-1R)_A0105,0.849717
ADT_CD117(c-Kit)_A0012,0.859047
...,...
ADT_TER-119-ErythroidCells_A0122,1.379144
ADT_Tim-4_A0567,0.899998
ADT_XCR1_A0568,1.102477
ADT_anti-P2RY12_A0415,1.378389


## Calculate PCC at the cell level

In [8]:
p_corrs_cell_level = calculate_PCC_array_cell_level(cell_names, predicted_protein_expression, true_protein_expression)
p_corrs_cell_level

Unnamed: 0,PCC
AAACCCAAGAATCTAG-2,0.879676
AAACCCACACCGGAAA-2,0.934952
AAACCCACACTACTTT-2,0.846155
AAACCCAGTAGGCAAC-2,0.824856
AAACCCAGTCTCAGGC-2,0.922616
...,...
TTTGTTGGTAAGATCA-2,0.913657
TTTGTTGGTCAGACTT-2,0.765713
TTTGTTGGTGTTTACG-2,0.765825
TTTGTTGTCAGAATAG-2,0.724093


## Perform $\ell_2$ normalization at the cell level

In [9]:
l2norm_true_protein_expression = np.sqrt(np.multiply(true_protein_expression, true_protein_expression).sum(axis=1))
l2norm_predicted_protein_expression = np.sqrt(np.multiply(predicted_protein_expression, predicted_protein_expression).sum(axis=1)) + 1e-08
true_protein_expression_l2norm = true_protein_expression / l2norm_true_protein_expression.values.reshape(-1,1)
predicted_protein_expression_l2norm = predicted_protein_expression / l2norm_predicted_protein_expression.values.reshape(-1,1)
true_protein_expression_l2norm, predicted_protein_expression_l2norm

(                    ADT_CD102_A0104  ADT_CD103_A0201  ADT_CD106_A0226  \
 index                                                                   
 AAACCCAAGAATCTAG-2         0.078820         0.006063         0.003638   
 AAACCCACACCGGAAA-2         0.094102         0.005156         0.005156   
 AAACCCACACTACTTT-2         0.037785         0.005038         0.002519   
 AAACCCAGTAGGCAAC-2         0.091423         0.010549         0.012893   
 AAACCCAGTCTCAGGC-2         0.061779         0.002896         0.004827   
 ...                             ...              ...              ...   
 TTTGTTGGTAAGATCA-2         0.055467         0.002133         0.005333   
 TTTGTTGGTCAGACTT-2         0.039294         0.006986         0.000000   
 TTTGTTGGTGTTTACG-2         0.244979         0.003951         0.011854   
 TTTGTTGTCAGAATAG-2         0.027464         0.008239         0.005493   
 TTTGTTGTCGAGTGGA-2         0.043529         0.001193         0.002981   
 
                     ADT_CD115(CSF-1

## Calculate RMSE at the cell level

In [10]:
rmses_cell_level = calculate_RMSE_array_cell_level(cell_names, predicted_protein_expression_l2norm, true_protein_expression_l2norm)
rmses_cell_level

Unnamed: 0,RMSE
AAACCCAAGAATCTAG-2,0.044317
AAACCCACACCGGAAA-2,0.033222
AAACCCACACTACTTT-2,0.050692
AAACCCAGTAGGCAAC-2,0.053270
AAACCCAGTCTCAGGC-2,0.035733
...,...
TTTGTTGGTAAGATCA-2,0.037704
TTTGTTGGTCAGACTT-2,0.062286
TTTGTTGGTGTTTACG-2,0.061257
TTTGTTGTCAGAATAG-2,0.068255


## Save evaluation results

In [11]:
p_corrs_protein_level.to_excel(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/moETM/PCC_protein_level.xlsx"))
rmses_protein_level.to_excel(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/moETM/RMSE_protein_level.xlsx"))
p_corrs_cell_level.to_excel(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/moETM/PCC_cell_level.xlsx"))
rmses_cell_level.to_excel(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/moETM/RMSE_cell_level.xlsx"))