In [1]:
import pandas as pd
import numpy as np
import sys
import re
import os

import server
from server import Server 
import client
from client import Client

In [2]:
import pyximport
pyximport.install(setup_args={"script_args" : ["--verbose"]})
from linbinR import fast_linbin

In [3]:
split_dir = "E:\\Msc Systems Biology\\MSB5000_Master_Thesis\\Practical work\\Data\\GSE66351_splits"
output = "E:\\Msc Systems Biology\\MSB5000_Master_Thesis\\Practical work\\Data\\GSE66351_Fed"

## Initialising the clients

In [4]:
# create client
lab_a = Client("Lab_A", os.path.join(split_dir, "Split_1_design.csv"), os.path.join(split_dir, "Split_1_methylated.csv"), os.path.join(split_dir, "Split_1_unmethylated.csv"), os.path.join(split_dir, "probe_annotation.csv"))
lab_b = Client("Lab_B", os.path.join(split_dir, "Split_2_design.csv"), os.path.join(split_dir, "Split_2_methylated.csv"), os.path.join(split_dir, "Split_2_unmethylated.csv"), os.path.join(split_dir, "probe_annotation.csv"))
lab_c = Client("Lab_C", os.path.join(split_dir, "Split_3_design.csv"), os.path.join(split_dir, "Split_3_methylated.csv"), os.path.join(split_dir, "Split_3_unmethylated.csv"), os.path.join(split_dir, "probe_annotation.csv"))

## Initialising the server

In [5]:
serv = Server(["AD", "CTRL"], ["Age", "Sex", "sentrix_id"])
global_conditions = serv.return_global_conditions()

## Joining clients to the server

In [6]:
# join the clients
serv.get_clients(lab_a.cohort_name, lab_a.probes, lab_a.designmatrix.index)
serv.get_clients(lab_b.cohort_name, lab_b.probes, lab_b.designmatrix.index)
serv.get_clients(lab_c.cohort_name, lab_c.probes, lab_c.designmatrix.index)

In [7]:
global_probes = serv.find_global_probes()
cohort_effect = serv.find_cohort_effects()

In [8]:
#check client input
lab_a.input_validation(global_conditions, global_probes)
lab_b.input_validation(global_conditions, global_probes)
lab_c.input_validation(global_conditions, global_probes)

In [9]:
lab_a.cohort_effects(serv.cohort_effects)
lab_b.cohort_effects(serv.cohort_effects)
lab_c.cohort_effects(serv.cohort_effects)

In [10]:
if "sentrix_id" in global_conditions:
    lab_a.find_unique_SentrixIDS()
    lab_b.find_unique_SentrixIDS()
    lab_c.find_unique_SentrixIDS()
    global_sentrix = serv.return_global_SentrixID(lab_a.unique_SentrixIDS,
                                lab_b.unique_SentrixIDS,
                                lab_c.unique_SentrixIDS)
    lab_a.SentrixID_effects(global_sentrix)
    lab_b.SentrixID_effects(global_sentrix)
    lab_c.SentrixID_effects(global_sentrix)
    



## Dasen normalisation

Client side

In [12]:
lab_a.intensity_distributions()
lab_b.intensity_distributions()
lab_c.intensity_distributions()

(                GSM2808876_8918692108_R01C01  GSM2808877_8918692108_R02C02  \
 cg00000029                            2499.0                        3087.0   
 cg00000108                            5679.0                        6121.0   
 cg00000109                            1760.0                        1922.0   
 cg00000165                             855.0                         983.0   
 cg00000236                            2710.0                        3356.0   
 ...                                      ...                           ...   
 ch.X.97129969R                         704.0                         395.0   
 ch.X.97133160R                        2407.0                        1359.0   
 ch.X.97651759F                        3973.0                        1856.0   
 ch.X.97737721F                        3893.0                        2355.0   
 ch.X.98007042R                        1514.0                         718.0   
 
                 GSM2808881_8918692108_R04C02  GSM

In [13]:
local_dasen_paramA = lab_a.local_normalisation_parameters()
local_dasen_paramB = lab_b.local_normalisation_parameters()
local_dasen_paramC = lab_c.local_normalisation_parameters()

Server side

In [14]:
probe_type_means = serv.aggregate_QN_means(local_dasen_paramA, local_dasen_paramB, local_dasen_paramC)

Client side

In [15]:
lab_a.final_normalisation(probe_type_means)
lab_b.final_normalisation(probe_type_means)
lab_c.final_normalisation(probe_type_means)

Unnamed: 0,GSM2808876_8918692108_R01C01,GSM2808877_8918692108_R02C02,GSM2808881_8918692108_R04C02,GSM2808888_8918692120_R04C01,GSM2808891_8918692120_R06C02,GSM2808896_8221932039_R05C01,GSM2808900_8221932039_R03C02,GSM2808901_8221932039_R06C02,GSM2808902_8221932039_R05C02,GSM2808908_9247377057_R06C01,...,GSM2809034_5900438023_R05C02,GSM2809039_5900438003_R04C01,GSM2809046_5854945005_R02C01,GSM2809047_5854945005_R03C01,GSM2809051_5854945005_R03C02,GSM2809053_5854945005_R05C02,GSM2809055_5854945011_R01C01,GSM2809057_5854945011_R03C01,GSM2809058_5854945011_R04C01,GSM2809059_5854945011_R05C01
cg00000029,0.612893,0.645753,0.685034,0.555095,0.659866,0.553193,0.531689,0.630695,0.539317,0.563119,...,0.587502,0.660745,0.576664,0.619541,0.552536,0.610707,0.589649,0.591322,0.593586,0.544863
cg00000108,0.894513,0.885150,0.892796,0.905099,0.890299,0.896547,0.908008,0.895261,0.903957,0.915938,...,0.884324,0.890395,0.875127,0.885995,0.872514,0.880245,0.886876,0.874415,0.887306,0.890384
cg00000109,0.765342,0.686668,0.666553,0.788453,0.666381,0.790743,0.819840,0.689122,0.784788,0.810438,...,0.719424,0.731211,0.750838,0.742246,0.735088,0.768949,0.765047,0.731248,0.719772,0.756501
cg00000165,0.305253,0.351852,0.378875,0.295425,0.379474,0.287283,0.295979,0.367261,0.293877,0.307368,...,0.309472,0.375517,0.310355,0.276427,0.351837,0.351976,0.352004,0.316026,0.372187,0.346962
cg00000236,0.789144,0.812750,0.815253,0.777129,0.813197,0.719558,0.799761,0.827695,0.770673,0.773271,...,0.810676,0.799848,0.795311,0.798062,0.797961,0.820847,0.800473,0.788946,0.819893,0.803959
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ch.X.97129969R,0.332673,0.299612,0.283016,0.309394,0.228929,0.309988,0.324906,0.285316,0.403810,0.396523,...,0.356196,0.300564,0.249201,0.324424,0.351466,0.291119,0.374168,0.315134,0.336106,0.347352
ch.X.97133160R,0.597740,0.491542,0.399061,0.542554,0.344285,0.583005,0.577463,0.460617,0.728425,0.705552,...,0.440021,0.422256,0.347223,0.412692,0.556734,0.330280,0.467309,0.487551,0.361825,0.565140
ch.X.97651759F,0.582474,0.355892,0.366986,0.580222,0.233003,0.564220,0.622953,0.312512,0.786665,0.778670,...,0.336278,0.298142,0.350278,0.394727,0.526765,0.280557,0.447091,0.442271,0.343252,0.495237
ch.X.97737721F,0.410953,0.241596,0.299413,0.361287,0.233290,0.420036,0.393942,0.268637,0.433500,0.357335,...,0.313246,0.296452,0.310107,0.322968,0.315495,0.284369,0.300656,0.350168,0.290463,0.335341


In [16]:
# save the betas for testing
lab_a.betas.to_csv(os.path.join(output, "even_split1_betas.csv"))
lab_b.betas.to_csv(os.path.join(output, "even_split2_betas.csv"))
lab_c.betas.to_csv(os.path.join(output, "even_split3_betas.csv"))

## EWAS - Linear regression model

Client side

In [17]:
local_rega = lab_a.local_xtx_xty()
local_regb = lab_b.local_xtx_xty()
local_regc = lab_c.local_xtx_xty()

Server side

In [18]:
serv.global_regression_parameter(local_rega, local_regb, local_regc)

#### Client side

calculate the local sse and covariance of the regression coefficients

In [19]:
SSE_a,cov_coef_a = lab_a.compute_SSE_and_cov_coef(serv.beta)
SSE_b,cov_coef_b = lab_b.compute_SSE_and_cov_coef(serv.beta)
SSE_c,cov_coef_c = lab_c.compute_SSE_and_cov_coef(serv.beta)
SSE_list = [SSE_a, SSE_b, SSE_c]
cov_coef_list = [cov_coef_a, cov_coef_b, cov_coef_c]

#### Server side

calculate the global SSE and covariance of the regression coefficients

In [20]:
serv.aggregate_SSE_and_cov_coef(SSE_list,cov_coef_list)

Make and fit the contrasts to the linear model

In [21]:
contrasts_mat = serv.make_contrasts(contrasts=[(["AD"],["CTRL"])])
serv.fit_contasts(contrasts_mat.values)

Calculate the P-values

In [22]:
serv.eBayes()

Get the results table

In [23]:
serv.table 

Unnamed: 0,t,lods,logFC,CI.L,CI.R,adj.P.Val,P.Value
cg23289794,-6.655820,11.682015,-2.352359e-02,-0.030502,-0.016546,0.000178,3.943927e-10
cg11758688,-6.506860,10.901237,-2.439186e-01,-0.317931,-0.169906,0.000197,8.725692e-10
cg11992265,6.008300,8.366595,6.170333e-02,0.041427,0.081980,0.001737,1.155541e-08
cg15858894,5.663543,6.689992,4.594001e-02,0.029925,0.061955,0.007234,6.418218e-08
cg01832266,-5.382926,5.374965,-2.002987e-02,-0.027377,-0.012683,0.022296,2.472700e-07
...,...,...,...,...,...,...,...
cg21243919,-0.000013,-8.042141,-2.271746e-08,-0.003348,0.003348,0.999998,9.999893e-01
cg23882945,0.000009,-8.042141,1.075698e-07,-0.024428,0.024428,0.999998,9.999931e-01
cg16424440,-0.000004,-8.042141,-8.795325e-09,-0.004029,0.004029,0.999998,9.999966e-01
cg27579805,-0.000004,-8.042141,-2.985883e-08,-0.015879,0.015879,0.999998,9.999970e-01


In [24]:
serv.table.to_csv(os.path.join(output, "even_splits_EWAS_results.csv"))