# To unify the sampler, we borrow function from R.

In [60]:
repeats = int(10)
nfold = int(5)
nsample = int(240)
cf=int(nsample/nfold)
lf=nsample-cf
import rpy2.robjects as robjects
import numpy as np
setseed = robjects.r['set.seed']
sample = robjects.r['sample']
sd = robjects.r["sd"]

# Platform information

In [2]:
import platform
platform.uname()

uname_result(system='Windows', node='LAPTOP-9BDORFP1', release='10', version='10.0.17134', machine='AMD64', processor='Intel64 Family 6 Model 158 Stepping 9, GenuineIntel')

In [6]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 13544685235067680755, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 78616985
 locality {
   bus_id: 1
 }
 incarnation: 6268415023482120726
 physical_device_desc: "device: 0, name: GeForce GTX 1050, pci bus id: 0000:01:00.0, compute capability: 6.1"]

In [7]:
import sys
print (sys.version)

3.5.4 | packaged by conda-forge | (default, Dec 18 2017, 06:53:03) [MSC v.1900 64 bit (AMD64)]


# Loading data

In [24]:
import pandas as pd
pheno = pd.read_csv('./2017heteroPheno.csv')
I = pd.read_table('./Hetero_realigned_cov10_filtered3.raw',sep="\s+").as_matrix()-1
bcw=pheno.bcw.as_matrix()
length=pheno.length.as_matrix()
AccSum1=np.reshape(np.zeros(repeats*nfold),(nfold,repeats))
AccSum2=np.copy(AccSum1);AccSum3=np.copy(AccSum1);AccSum4=np.copy(AccSum1)

# Import packages for deep learning models

In [46]:
from scipy.stats.stats import pearsonr
import keras 
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Activation
import time
print("version of tensorflow_gpu:",tf.__version__)
print("version of tensorflow_bakend_keras:",keras.__version__)

version of tensorflow_gpu: 1.4.0
version of tensorflow_bakend_keras: 2.1.4


# Neural network

In [None]:
start = time.time()
#10-repeat-5-fold Cross validation as same as R
for j in range(repeats):
    setseed(100+3*(j+1)+1)
    id =np.array(sample(robjects.IntVector(np.arange(1,(nsample+1)) %nfold)))
    for i in range(nfold):
        test = np.array(np.where(id==i)).reshape(cf)
        train = np.array(np.where(id!=i)).reshape(lf)
        #Model
        model = Sequential()
        model.add(Dense(200, activation='relu', input_dim=3928))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(1)) 
        #Optimizer
        model.compile(optimizer='Adam',loss='mse')
        #Feed the data
        model.fit(I[train,],bcw[train],epochs=35, batch_size=128)
        #Prediction
        pred = model.predict(I[test,],batch_size=None, verbose=0).reshape(cf)
        pred1 = model.predict(I[train,],batch_size=None, verbose=0).reshape(lf)
        #Accuracy
        AccSum1[i,j] = pearsonr(pred,bcw[test])[0]
        AccSum2[i,j] = pearsonr(pred1,bcw[train])[0]
end = time.time()

In [48]:
print("eplased time",end - start)

eplased time 414.90176010131836


In [49]:
#validate on test and train group
print(np.mean(AccSum1),np.mean(AccSum2))

0.300227714854724 0.9741363391166052


In [8]:
from keras.utils import plot_model
plot_model(model, to_file='modelbasic.png',show_layer_names=True,show_shapes=True)

# Multi task deep learning model

In [23]:
import keras
from keras.layers import Input, Dense
from keras.models import Model
from keras import optimizers

In [30]:
#Optimizer
rmsprop=optimizers.RMSprop(lr=0.003)

In [None]:
start = time.time()
#10-repeat-5-fold Cross validation as same as R
for j in range(repeats):
    setseed(100+3*(j+1)+1)
    id =np.array(sample(robjects.IntVector(np.arange(1,(nsample+1)) %nfold)))
    for i in range(nfold):
        test = np.array(np.where(id==i)).reshape(cf)
        train = np.array(np.where(id!=i)).reshape(lf)
        #Model
        main_input = Input(shape=(3928,), name='main_input')
        x = Dense(200, activation='relu')(main_input)


        x1 = Dense(20, activation='relu')(x)
        y1 = Dense(1,name='y1')(x1)

        x2 = Dense(100, activation='relu')(x)
        y2= Dense(1,name='y2')(x2)

        model = Model(inputs=[main_input], outputs=[y1,y2])
        #Compile
        model.compile(optimizer=rmsprop,loss={'y1': 'mse','y2':'mse'},loss_weights={'y1': 1.,'y2': 0.2})
        #Train
        model.fit({'main_input': I[train,]},{'y1': bcw[train],'y2': length[train]},
                      epochs=30, batch_size=128,callbacks=None)
        #Accuracy
        AccSum3[i,j] =pearsonr(model.predict(I[test,], batch_size=None, verbose=0)[0].reshape(cf),bcw[test])[0]
        AccSum4[i,j] =pearsonr(model.predict(I[train,], batch_size=None, verbose=0)[0].reshape(lf),bcw[train])[0]
end = time.time()

In [32]:
print("eplased time",end - start)

eplased time 312.6359119415283


In [33]:
#validate on test and train
print(np.mean(AccSum3),np.mean(AccSum4))

0.30666759049545633 0.9376196462345063


In [34]:
from keras.utils import plot_model
plot_model(model, to_file='model.png',show_layer_names=False,show_shapes=True)

In [153]:
#save result
import pandas as pd
seven_models_r = pd.read_excel("All_models_Acc.xlsx")
v1=pd.DataFrame(pd.DataFrame(AccSum1).values.flatten(),columns=["NN"],index=range(1,51))
v2=pd.DataFrame(pd.DataFrame(AccSum3).values.flatten(),columns=["MNN"],index=range(1,51))
pd.concat([seven_models_r,v1,v2],axis=1).to_excel('All_models_Acc.xlsx')