## Set path

In [1]:
import os
os.chdir(os.path.join(os.getcwd(), 'code/'))
outputs_dir = os.path.abspath(os.path.join(os.getcwd(), '../outputs'))

## Load necessary libraries

In [2]:
from evaluation_utils import *
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

## Load prediction and ground truth

In [3]:
true_protein_expression = pd.read_table(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/cTP-net/test_protein_clr.txt"), 
                                        sep="\t", index_col=0)
predicted_protein_expression = pd.read_table(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/cTP-net/test_protein_prediction.txt"), 
                                             sep="\t", index_col=0)
true_protein_expression, predicted_protein_expression

(                    ADT_CD102_A0104  ADT_CD103_A0201  ADT_CD106_A0226  \
 index                                                                   
 AAACCCAAGAATCTAG-2         2.249319        -0.148576        -0.554041   
 AAACCCACACCGGAAA-2         2.477534        -0.217093        -0.217093   
 AAACCCACACTACTTT-2         1.893240         0.010509        -0.549107   
 AAACCCAGTAGGCAAC-2         2.226561         0.159698         0.342020   
 AAACCCAGTCTCAGGC-2         2.356191        -0.431902        -0.026437   
 ...                             ...              ...              ...   
 TTTGTTGGTAAGATCA-2         2.038612        -0.833067        -0.139920   
 TTTGTTGGTCAGACTT-2         2.000934         0.369517        -1.827708   
 TTTGTTGGTGTTTACG-2         3.162175        -0.567527         0.279771   
 TTTGTTGTCAGAATAG-2         1.633254         0.501851         0.145177   
 TTTGTTGTCGAGTGGA-2         2.151776        -1.053676        -0.360529   
 
                     ADT_CD115(CSF-1

## Get protein names and cell names

In [4]:
protein_names = np.array(true_protein_expression.columns)
cell_names = np.array(true_protein_expression.index)
protein_names, cell_names

(array(['ADT_CD102_A0104', 'ADT_CD103_A0201', 'ADT_CD106_A0226',
        'ADT_CD115(CSF-1R)_A0105', 'ADT_CD117(c-Kit)_A0012',
        'ADT_CD11a_A0595', 'ADT_CD11c_A0106', 'ADT_CD122(IL-2Rb)_A0227',
        'ADT_CD127(IL-7Ra)_A0198', 'ADT_CD134(OX-40)_A0195',
        'ADT_CD135_A0098', 'ADT_CD137_A0194', 'ADT_CD140a_A0573',
        'ADT_CD14_A0424', 'ADT_CD15(SSEA-1)_A0076',
        'ADT_CD150(SLAM)_A0203', 'ADT_CD16-32_A0109',
        'ADT_CD169(Siglec-1)_A0440', 'ADT_CD172a(SIRPa)_A0422',
        'ADT_CD183(CXCR3)_A0228', 'ADT_CD184(CXCR4)_A0444',
        'ADT_CD192(CCR2)_A0426', 'ADT_CD195(CCR5)_A0376',
        'ADT_CD196(CCR6)_A0225', 'ADT_CD197(CCR7)_A0377', 'ADT_CD19_A0093',
        'ADT_CD200(OX2)_A0079', 'ADT_CD201(EPCR)_A0439',
        'ADT_CD204(Msr1)_A0448', 'ADT_CD206(MMR)_A0173', 'ADT_CD20_A0192',
        'ADT_CD21-CD35(CR2-CR1)_A0107', 'ADT_CD223(LAG-3)_A0378',
        'ADT_CD23_A0108', 'ADT_CD24_A0212', 'ADT_CD25_A0097',
        'ADT_CD274(B7-H1_PD-L1)_A0190', 'ADT_CD278

## Calculate PCC at the protein level

In [5]:
p_corrs_protein_level = calculate_PCC_array_protein_level(protein_names, predicted_protein_expression, true_protein_expression)
p_corrs_protein_level

Unnamed: 0,PCC
ADT_CD102_A0104,0.690924
ADT_CD103_A0201,0.677884
ADT_CD106_A0226,0.503498
ADT_CD115(CSF-1R)_A0105,0.470114
ADT_CD117(c-Kit)_A0012,0.572805
...,...
ADT_TER-119-ErythroidCells_A0122,0.137182
ADT_Tim-4_A0567,0.532624
ADT_XCR1_A0568,0.175483
ADT_anti-P2RY12_A0415,0.091249


## Perform Z-score transformation at the protein level

In [6]:
true_protein_expression_zscore = pd.DataFrame(StandardScaler().fit_transform(true_protein_expression.values), index=true_protein_expression.index, 
                                              columns=true_protein_expression.columns)
predicted_protein_expression_zscore = pd.DataFrame(StandardScaler().fit_transform(predicted_protein_expression.values), 
                                                   index=predicted_protein_expression.index, columns=predicted_protein_expression.columns)    
true_protein_expression_zscore, predicted_protein_expression_zscore

(                    ADT_CD102_A0104  ADT_CD103_A0201  ADT_CD106_A0226  \
 index                                                                   
 AAACCCAAGAATCTAG-2         0.542267        -0.190559        -0.434861   
 AAACCCACACCGGAAA-2         0.923818        -0.280504         0.094249   
 AAACCCACACTACTTT-2        -0.053060         0.018280        -0.427113   
 AAACCCAGTAGGCAAC-2         0.504218         0.214128         0.972227   
 AAACCCAGTCTCAGGC-2         0.720945        -0.562495         0.393637   
 ...                             ...              ...              ...   
 TTTGTTGGTAAGATCA-2         0.189987        -1.089123         0.215434   
 TTTGTTGGTCAGACTT-2         0.126993         0.489567        -2.434904   
 TTTGTTGGTGTTTACG-2         2.068465        -0.740535         0.874477   
 TTTGTTGTCAGAATAG-2        -0.487730         0.663289         0.663123   
 TTTGTTGTCGAGTGGA-2         0.379186        -1.378727        -0.130989   
 
                     ADT_CD115(CSF-1

## Calculate RMSE at the protein level

In [7]:
rmses_protein_level = calculate_RMSE_array_protein_level(protein_names, predicted_protein_expression_zscore, true_protein_expression_zscore)
rmses_protein_level

Unnamed: 0,RMSE
ADT_CD102_A0104,0.786227
ADT_CD103_A0201,0.802640
ADT_CD106_A0226,0.996496
ADT_CD115(CSF-1R)_A0105,1.029452
ADT_CD117(c-Kit)_A0012,0.924332
...,...
ADT_TER-119-ErythroidCells_A0122,1.313635
ADT_Tim-4_A0567,0.966826
ADT_XCR1_A0568,1.284147
ADT_anti-P2RY12_A0415,1.348148


## Calculate PCC at the cell level

In [8]:
p_corrs_cell_level = calculate_PCC_array_cell_level(cell_names, predicted_protein_expression, true_protein_expression)
p_corrs_cell_level

Unnamed: 0,PCC
AAACCCAAGAATCTAG-2,0.931154
AAACCCACACCGGAAA-2,0.931756
AAACCCACACTACTTT-2,0.929548
AAACCCAGTAGGCAAC-2,0.939862
AAACCCAGTCTCAGGC-2,0.930120
...,...
TTTGTTGGTAAGATCA-2,0.933648
TTTGTTGGTCAGACTT-2,0.905767
TTTGTTGGTGTTTACG-2,0.897732
TTTGTTGTCAGAATAG-2,0.921398


## Perform $\ell_2$ normalization at the cell level

In [9]:
l2norm_true_protein_expression = np.sqrt(np.multiply(true_protein_expression, true_protein_expression).sum(axis=1))
l2norm_predicted_protein_expression = np.sqrt(np.multiply(predicted_protein_expression, predicted_protein_expression).sum(axis=1)) + 1e-08
true_protein_expression_l2norm = true_protein_expression / l2norm_true_protein_expression.values.reshape(-1,1)
predicted_protein_expression_l2norm = predicted_protein_expression / l2norm_predicted_protein_expression.values.reshape(-1,1)
true_protein_expression_l2norm, predicted_protein_expression_l2norm

(                    ADT_CD102_A0104  ADT_CD103_A0201  ADT_CD106_A0226  \
 index                                                                   
 AAACCCAAGAATCTAG-2         0.149797        -0.009895        -0.036897   
 AAACCCACACCGGAAA-2         0.186246        -0.016320        -0.016320   
 AAACCCACACTACTTT-2         0.124559         0.000691        -0.036126   
 AAACCCAGTAGGCAAC-2         0.146017         0.010473         0.022429   
 AAACCCAGTCTCAGGC-2         0.146378        -0.026832        -0.001642   
 ...                             ...              ...              ...   
 TTTGTTGGTAAGATCA-2         0.137932        -0.056365        -0.009467   
 TTTGTTGGTCAGACTT-2         0.133902         0.024728        -0.122310   
 TTTGTTGGTGTTTACG-2         0.230331        -0.041338         0.020378   
 TTTGTTGTCAGAATAG-2         0.112578         0.034592         0.010007   
 TTTGTTGTCGAGTGGA-2         0.122663        -0.060065        -0.020552   
 
                     ADT_CD115(CSF-1

## Calculate RMSE at the cell level

In [10]:
rmses_cell_level = calculate_RMSE_array_cell_level(cell_names, predicted_protein_expression_l2norm, true_protein_expression_l2norm)
rmses_cell_level

Unnamed: 0,RMSE
AAACCCAAGAATCTAG-2,0.035407
AAACCCACACCGGAAA-2,0.035262
AAACCCACACTACTTT-2,0.035886
AAACCCAGTAGGCAAC-2,0.033152
AAACCCAGTCTCAGGC-2,0.035708
...,...
TTTGTTGGTAAGATCA-2,0.034840
TTTGTTGGTCAGACTT-2,0.041424
TTTGTTGGTGTTTACG-2,0.043144
TTTGTTGTCAGAATAG-2,0.037898


## Save evaluation results

In [11]:
p_corrs_protein_level.to_excel(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/cTP-net/PCC_protein_level.xlsx"))
rmses_protein_level.to_excel(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/cTP-net/RMSE_protein_level.xlsx"))
p_corrs_cell_level.to_excel(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/cTP-net/PCC_cell_level.xlsx"))
rmses_cell_level.to_excel(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/cTP-net/RMSE_cell_level.xlsx"))