In [1]:
import pandas as pd
import numpy as np
import sys
import re
import os

import server
from server import Server 
import client
from client import Client

In [2]:
import pyximport
pyximport.install(setup_args={"script_args" : ["--verbose"]})
from linbinR import fast_linbin

In [2]:
split_dir = "/home/silke/Documents/FED_EWAS/Data/GSE66351_mild_splits"
output = "/home/silke/Documents/FED_EWAS/Data/GSE66351_Fed"

## Initialising the clients

In [3]:
# create client
lab_a = Client("Lab_A", os.path.join(split_dir, "Split_1_design.csv"), os.path.join(split_dir, "Split_1_methylated.csv"), os.path.join(split_dir, "Split_1_unmethylated.csv"), os.path.join(split_dir, "probe_annotation.csv"))
lab_b = Client("Lab_B", os.path.join(split_dir, "Split_2_design.csv"), os.path.join(split_dir, "Split_2_methylated.csv"), os.path.join(split_dir, "Split_2_unmethylated.csv"), os.path.join(split_dir, "probe_annotation.csv"))
lab_c = Client("Lab_C", os.path.join(split_dir, "Split_3_design.csv"), os.path.join(split_dir, "Split_3_methylated.csv"), os.path.join(split_dir, "Split_3_unmethylated.csv"), os.path.join(split_dir, "probe_annotation.csv"))

## Initialising the server

In [6]:
serv = Server(["AD", "CTRL"], ["Age", "Sex", "sentrix_id"])
global_conditions = serv.return_global_conditions()

## Joining clients to the server

In [7]:
# join the clients
serv.get_clients(lab_a.cohort_name, lab_a.probes, lab_a.designmatrix.index)
serv.get_clients(lab_b.cohort_name, lab_b.probes, lab_b.designmatrix.index)
serv.get_clients(lab_c.cohort_name, lab_c.probes, lab_c.designmatrix.index)

In [8]:
global_probes = serv.find_global_probes()
cohort_effect = serv.find_cohort_effects()

In [9]:
#check client input
lab_a.input_validation(global_conditions, global_probes)
lab_b.input_validation(global_conditions, global_probes)
lab_c.input_validation(global_conditions, global_probes)

In [10]:
lab_a.cohort_effects(serv.cohort_effects)
lab_b.cohort_effects(serv.cohort_effects)
lab_c.cohort_effects(serv.cohort_effects)

In [11]:
if "sentrix_id" in global_conditions:
    lab_a.find_unique_SentrixIDS()
    lab_b.find_unique_SentrixIDS()
    lab_c.find_unique_SentrixIDS()
    global_sentrix = serv.return_global_SentrixID(lab_a.unique_SentrixIDS,
                                lab_b.unique_SentrixIDS,
                                lab_c.unique_SentrixIDS)
    lab_a.SentrixID_effects(global_sentrix)
    lab_b.SentrixID_effects(global_sentrix)
    lab_c.SentrixID_effects(global_sentrix)
    



## Dasen normalisation

Client side

In [12]:
lab_a.intensity_distributions()
lab_b.intensity_distributions()
lab_c.intensity_distributions()

(                GSM2808875_8918692108_R01C02  GSM2808876_8918692108_R01C01  \
 cg00000029                            2508.0                        2499.0   
 cg00000108                            5354.0                        5679.0   
 cg00000109                            1779.0                        1760.0   
 cg00000165                             966.0                         855.0   
 cg00000236                            3171.0                        2710.0   
 ...                                      ...                           ...   
 ch.X.97129969R                         340.0                         704.0   
 ch.X.97133160R                        1030.0                        2407.0   
 ch.X.97651759F                        1275.0                        3973.0   
 ch.X.97737721F                        2143.0                        3893.0   
 ch.X.98007042R                         544.0                        1514.0   
 
                 GSM2808878_8918692108_R02C01  GSM

In [13]:
local_dasen_paramA = lab_a.local_normalisation_parameters()
local_dasen_paramB = lab_b.local_normalisation_parameters()
local_dasen_paramC = lab_c.local_normalisation_parameters()

Server side

In [14]:
probe_type_means = serv.aggregate_QN_means(local_dasen_paramA, local_dasen_paramB, local_dasen_paramC)

Client side

In [15]:
lab_a.final_normalisation(probe_type_means)
lab_b.final_normalisation(probe_type_means)
lab_c.final_normalisation(probe_type_means)

Unnamed: 0,GSM2808875_8918692108_R01C02,GSM2808876_8918692108_R01C01,GSM2808878_8918692108_R02C01,GSM2808879_8918692108_R03C02,GSM2808881_8918692108_R04C02,GSM2808883_8918692108_R05C02,GSM2808884_8918692108_R05C01,GSM2808887_8918692120_R04C02,GSM2808889_8918692120_R05C02,GSM2808890_8918692120_R05C01,...,GSM2809050_5854945005_R06C01,GSM2809051_5854945005_R03C02,GSM2809053_5854945005_R05C02,GSM2809056_5854945011_R02C01,GSM2809058_5854945011_R04C01,GSM2809059_5854945011_R05C01,GSM2809061_5854945011_R01C02,GSM2809062_5854945011_R03C02,GSM2809063_5854945011_R04C02,GSM2809064_5854945011_R05C02
cg00000029,0.651772,0.612893,0.594738,0.676544,0.685034,0.599437,0.631644,0.675504,0.678419,0.602605,...,0.607883,0.552536,0.610707,0.653951,0.593586,0.544863,0.595921,0.627015,0.595071,0.604785
cg00000108,0.886961,0.894513,0.895567,0.895378,0.892796,0.910837,0.905866,0.899353,0.894724,0.903664,...,0.908866,0.872514,0.880245,0.864715,0.887306,0.890384,0.894619,0.885518,0.881948,0.873194
cg00000109,0.683035,0.765342,0.767077,0.662039,0.666553,0.701507,0.784446,0.652433,0.634554,0.754802,...,0.772755,0.735088,0.768949,0.801284,0.719772,0.756501,0.780224,0.785353,0.742880,0.749155
cg00000165,0.363254,0.305253,0.283040,0.368762,0.378875,0.388608,0.268668,0.333466,0.353539,0.332663,...,0.373131,0.351837,0.351976,0.330318,0.372187,0.346962,0.325860,0.323726,0.348148,0.304639
cg00000236,0.838525,0.789144,0.788310,0.818738,0.815253,0.837046,0.822737,0.812829,0.788290,0.772109,...,0.812166,0.797961,0.820847,0.823407,0.819893,0.803959,0.823150,0.791650,0.806551,0.794142
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ch.X.97129969R,0.226861,0.332673,0.450387,0.216691,0.283016,0.230539,0.399877,0.210922,0.257442,0.364019,...,0.293111,0.351466,0.291119,0.338459,0.336106,0.347352,0.387089,0.237799,0.302188,0.286885
ch.X.97133160R,0.322142,0.597740,0.696986,0.350835,0.399061,0.401637,0.725559,0.370861,0.391686,0.591472,...,0.309546,0.556734,0.330280,0.527641,0.361825,0.565140,0.494044,0.334567,0.395927,0.370618
ch.X.97651759F,0.226885,0.582474,0.755088,0.224582,0.366986,0.283942,0.783406,0.223443,0.268584,0.605756,...,0.266676,0.526765,0.280557,0.346343,0.343252,0.495237,0.393035,0.281672,0.370430,0.320815
ch.X.97737721F,0.236354,0.410953,0.423455,0.232693,0.299413,0.195063,0.422111,0.223105,0.276326,0.437649,...,0.263860,0.315495,0.284369,0.295588,0.290463,0.335341,0.305950,0.279807,0.326334,0.275812


In [16]:
# save the betas for testing
lab_a.betas.to_csv(os.path.join(output, "strong_split1_betas.csv"))
lab_b.betas.to_csv(os.path.join(output, "strong_split2_betas.csv"))
lab_c.betas.to_csv(os.path.join(output, "strong_split3_betas.csv"))

## EWAS - Linear regression model

Client side

In [17]:
local_rega = lab_a.local_xtx_xty()
local_regb = lab_b.local_xtx_xty()
local_regc = lab_c.local_xtx_xty()

Server side

In [18]:
serv.global_regression_parameter(local_rega, local_regb, local_regc)

#### Client side

calculate the local sse and covariance of the regression coefficients

In [19]:
SSE_a,cov_coef_a = lab_a.compute_SSE_and_cov_coef(serv.beta)
SSE_b,cov_coef_b = lab_b.compute_SSE_and_cov_coef(serv.beta)
SSE_c,cov_coef_c = lab_c.compute_SSE_and_cov_coef(serv.beta)
SSE_list = [SSE_a, SSE_b, SSE_c]
cov_coef_list = [cov_coef_a, cov_coef_b, cov_coef_c]

#### Server side

calculate the global SSE and covariance of the regression coefficients

In [20]:
serv.aggregate_SSE_and_cov_coef(SSE_list,cov_coef_list)

In [21]:
np.savetxt(os.path.join(output, "strong_splits_model_matrix.csv"), serv.beta, delimiter=",") 
np.savetxt(os.path.join(output, "strong_splits_model_matrix_xty.csv"), serv.global_xty, delimiter=",") 

Make and fit the contrasts to the linear model

In [22]:
contrasts_mat = serv.make_contrasts(contrasts=[(["AD"],["CTRL"])])
serv.fit_contasts(contrasts_mat.values)

In [23]:
contrasts_mat.to_csv(os.path.join(output, "strong_splits_contrastmat.csv"))
np.savetxt(os.path.join(output, "strong_splits_model_matrix_contractfit.csv"), serv.beta, delimiter=",") 
np.savetxt(os.path.join(output, "strong_splits_model_matrix_xty_contractfit.csv"), serv.global_xty, delimiter=",") 

Calculate the P-values

In [24]:
serv.eBayes()

Get the results table

In [25]:
serv.table 

Unnamed: 0,t,lods,logFC,CI.L,CI.R,adj.P.Val,P.Value
cg11758688,-6.176943,9.331129,-2.664268e-01,-0.351587,-0.181266,0.002203,4.887204e-09
cg26516741,5.730027,7.130199,1.355506e-02,0.008884,0.018226,0.010446,4.633855e-08
cg27353962,5.595971,6.491377,3.453242e-02,0.022349,0.046716,0.013400,8.916686e-08
cg23289794,-5.469382,5.897633,-2.185569e-02,-0.029745,-0.013966,0.018480,1.639614e-07
cg09324326,5.412534,5.634057,7.487043e-02,0.047559,0.102182,0.019379,2.149250e-07
...,...,...,...,...,...,...,...
cg17413194,-0.000012,-7.915247,-9.141305e-08,-0.015253,0.015253,0.999998,9.999906e-01
cg22231902,0.000010,-7.915247,4.816494e-08,-0.009228,0.009228,0.999998,9.999918e-01
cg14481208,-0.000008,-7.915247,-9.116552e-08,-0.022621,0.022621,0.999998,9.999937e-01
cg03428314,0.000003,-7.915247,3.854627e-08,-0.024498,0.024498,0.999999,9.999975e-01


In [26]:
serv.table.to_csv(os.path.join(output, "strong_splits_EWAS_results.csv"))