# Libraries

In [1]:
import pandas as pd
import numpy as np 
import os

# Data from Onset of Labor Study

In [35]:
Cytof_data = pd.read_csv('./Data/CYTOF_data_model_OOL.csv')
features = pd.read_csv('./Data/features_model_OOL.csv')
Id = pd.read_csv('./Data/Id_data_model_OOL.csv')
coefs = pd.read_csv('./Data/LeaveOneOut_coefs_model_OOL.csv')
SG_coefs = pd.read_csv('./Data/coefs_SG_model_OOL.csv')
time_points = pd.read_csv('./Data/Timepoints_data_model_OOL.csv')
EGA = pd.read_csv('./Data/EGA_data_model_OOL.csv')
DOS = pd.read_csv('./Data/DOS.csv')

In [48]:
Id.columns = ['index', 'ID']
time_points.columns = ['index','Timepoint']
EGA.columns = ['index','EGA']
DOS.columns = ['index','DOS']
Cytof_data.columns = ['SampleID']+list(Cytof_data.columns)[1:]
CyTOF = pd.concat([Cytof_data, Id['ID'], time_points['Timepoint'], EGA['EGA'], DOS['DOS']], axis=1)
CyTOF.index = CyTOF['SampleID']
CyTOF.drop('SampleID', axis=1, inplace=True)
CyTOF.to_csv('./Data/Onset of Labor CyTOF.csv')

# P-values of the immunome features with Spearmanr correlation

In [36]:
from scipy.stats import spearmanr
from stabl.visualization import scatterplot_features

X = features
y = DOS['x']

os.makedirs('../Onset of Labor/Univariate', exist_ok=True)

Spearmancorr = {}
features = X.columns[1:]
for feature in features:
	corr, pval = spearmanr(X[feature], y)
	Spearmancorr[feature] = [corr, pval]

SpearmanPvalue = pd.DataFrame(Spearmancorr).T
SpearmanPvalue.columns = ['Spearman corr', 'pvalue']
SpearmanPvalue.sort_values('pvalue', inplace=True)
SpearmanPvalue.to_csv("../Onset of Labor/Data/SpearmanCorrelationsPval.csv", index=True)

scatterplot_features(
	SpearmanPvalue[:10].index,
	X,
	y,
	show_fig=False,
	export_file=True,
	path='../Onset of Labor/Univariate')

# Coefficients of the OOL model for immunome features

In [80]:
all_coefs = pd.DataFrame(index=[f"iteration{i}" for i in range(1, 54)], 
                         columns=features.columns[1:], 
                         data=np.array(coefs['x']).reshape((53, 1423)))

all_SG_coefs = pd.DataFrame(index=[f"iteration{i}" for i in range(1, 54)], 
                         columns=['metabolomic', 'proteomics', 'immuno'], 
                         data=np.array(SG_coefs['x']).reshape((53, 3)))

# We are taking the median of the coeff for all the models which were built during LeaveOneOut
final_coefs = all_coefs.median() #* all_SG_coefs.median()['immuno']

#final_coefs.to_csv('../Drug Study/Onset of Labor csv/OOL_median_coefs.csv')

final_coefs[final_coefs != 0]

CCR5posCCR2posCD4Tcm_IkB_unstim      -1.182613
CD4Tem_MAPKAPK2_unstim               -0.816871
CD69posCD8Tmem_MAPKAPK2_unstim       -8.978811
ncMCs_CREB_GMCSF                     -7.507944
Bcells_STAT1_IFNa                     4.403599
DCs_STAT6_IFNa                       -2.749978
mDCs_STAT1_IFNa                       1.426287
pDCs_STAT1_IFNa                       2.218389
CD69negCD56loCD16negNK_STAT1_IFNa    33.982482
CD4Tcells_MAPKAPK2_IFNa               0.566993
CD62LposCD4Tnaive_MAPKAPK2_IFNa       4.377609
CCR5posCCR2posCD4Tem_NFkB_IL246      -1.427272
Bcells_MAPKAPK2_LPS                   2.167044
NK_CREB_LPS                           0.110689
CD69posCD56loCD16negNK_CREB_LPS       0.779535
Bcells                               -1.030805
Granulocytes                         -6.197337
dtype: float64

# Index Score from OOL

In [81]:
final_coefs = pd.DataFrame(data=final_coefs, columns=['coef'])

In [82]:
score_tabl = pd.merge(final_coefs, SpearmanPvalue, left_index=True, right_index=True)
score_tabl['IndexScore'] = -abs(score_tabl['coef'])*np.log10(score_tabl['pvalue'])
score_tabl.sort_values(by='IndexScore', ascending=False, inplace=True)
score_tabl[score_tabl['IndexScore'] != 0]

Unnamed: 0,coef,Spearman corr,pvalue,IndexScore
CD69negCD56loCD16negNK_STAT1_IFNa,33.982482,0.609242,1.311126e-16,539.721866
Bcells_STAT1_IFNa,4.403599,0.56241,6.914545e-14,57.952399
Granulocytes,-6.197337,-0.342882,1.744975e-05,29.488234
CD69posCD8Tmem_MAPKAPK2_unstim,-8.978811,-0.272075,0.000756832,28.022873
ncMCs_CREB_GMCSF,-7.507944,-0.277071,0.0005980945,24.199833
pDCs_STAT1_IFNa,2.218389,0.506051,3.94852e-11,23.07916
CD62LposCD4Tnaive_MAPKAPK2_IFNa,4.377609,0.346689,1.385747e-05,21.267806
mDCs_STAT1_IFNa,1.426287,0.520401,8.753178e-12,15.771644
Bcells_MAPKAPK2_LPS,2.167044,0.30633,0.0001376011,8.367776
DCs_STAT6_IFNa,-2.749978,-0.183878,0.02429429,4.439828


coef    1423
dtype: int64