In [29]:
import warnings
from rpy2.rinterface import RRuntimeWarning
warnings.filterwarnings('ignore',category=FutureWarning)
warnings.filterwarnings('ignore',category=RRuntimeWarning)
import sys
import os

In [30]:
import pandas as pd
import numpy as np

In [31]:
from rpy2.robjects import pandas2ri
pandas2ri.activate()

In [32]:
# Enable R magic
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [57]:
HERE = os.path.dirname(os.path.realpath('__file__'))
PROJECT = os.path.abspath(os.path.join(HERE, '..'))
DATA = os.path.join(PROJECT, 'data')

GENOTYPE_DATA = os.path.join(DATA, 'Asif_Genotype_Disease_Only_ROSMAP.csv')
SUGBRAPH_SNPS = os.path.join(DATA, 'subgraphs15_snps_mod1.csv')
SUBGRAPH_15_RDATA = os.path.join(DATA, 'subgraph15_snpset148.RData')

INPUT_FOR_MODEL = os.path.join(DATA, "rosmap148.snp.mat.RData")

AUTOENCODER_TRAINED_MATRIX = os.path.join(DATA, "autoencoder_trained_matrix.RData")
TRAINED_PATIENT_CLUSTERS = os.path.join(DATA, "trained_patient_clusters.RData")

CLUSTER_CLASSIFIER = os.path.join(DATA, "GLM_model_R")

USER_AUTOENCODER_MATRIX = os.path.join(DATA, "user_autoencoder_matrix.RData")

In [6]:
%%R

library(h2o)
library(randomForest)
library(glmnet)
library(gbm)
library(bnlearn)
library(ggplot2)
library(CORElearn)
library(dplyr)
library(data.table)


R[write to console]: 
----------------------------------------------------------------------

Your next step is to start H2O:
    > h2o.init()

For H2O package documentation, ask for help:
    > ??h2o

After starting H2O, you can use the Web UI at http://localhost:54321
For more information visit http://docs.h2o.ai

----------------------------------------------------------------------


R[write to console]: 
Attache Paket: ‘h2o’


R[write to console]: The following objects are masked from ‘package:stats’:

    cor, sd, var


R[write to console]: The following objects are masked from ‘package:base’:

    %*%, %in%, &&, apply, as.factor, as.numeric, colnames, colnames<-,
    ifelse, is.character, is.factor, is.numeric, log, log10, log1p,
    log2, round, signif, trunc, ||


R[write to console]: randomForest 4.6-14

R[write to console]: Type rfNews() to see new features/changes/bug fixes.

R[write to console]: Lade nötiges Paket: Matrix

R[write to console]: Loaded glmnet 3.0-2


R[write

In [7]:
%%R
#### main function ####

ann_fun = function(input_data){
  if(dim(input_data)[2] > 1)
  {
    h2o.init(nthreads = -1, max_mem_size = "12g")
    
    #sc = scale(input_data)
    myx = as.h2o(scale(input_data)) 
    
    n = round(dim(input_data)[2])
    m = round(dim(input_data)[2]/2)
    
    hyper_params <- list(hidden = list(1, c(n, 1), c(n,m,1), c(m,1)),
                         input_dropout_ratio = c(0,0.05, 0.2,0.5),
                         l2=10^c(-4:4))
    
    
    r = sample(20:60000, 1)
 
  grid = h2o.grid("deeplearning", 
                    grid_id = paste("mygrid", r, sep="_"), 
                    autoencoder= TRUE,
                  x = colnames(myx),
                    training_frame = myx, 
                    seed=1234567, 
                    stopping_metric="MSE", 
                    stopping_rounds = 5,
                    #activation= "TanhWithDropout",
                    activation= "Tanh",
                    standardize=TRUE,
                    epochs=500,
                    hyper_params = hyper_params)
    
    gbm_sorted_grid <- h2o.getGrid(grid_id = paste("mygrid", r, sep="_"), sort_by = "mse")
    fit <- h2o.getModel(gbm_sorted_grid@model_ids[[1]])
    #nlayers = length(strsplit(gbm_sorted_grid@summary_table[1,1], ",")[[1]])
    #browser()
    nlayers = length(strsplit(substr(gbm_sorted_grid@summary_table[1,1], 2, nchar(gbm_sorted_grid@summary_table[1,1])-1), ",")[[1]])
    newvar = as.data.frame(h2o.deepfeatures(fit, myx, nlayers))
 
  
    #Rename column
 
    newvar = as.data.frame(newvar[,1])
    colnames(newvar) = paste( sub("_.*", '', colnames(input_data)[1]) , sub(".*_", '',colnames(input_data)[1]) , sep= "_")
    
   
    #save meta-features in variable 
    subgraph_feature = data.frame(matrix(NA, nrow = 844, ncol = 1)) # nrow = Patient number
    colnames(subgraph_feature) = "dummy"
 
  
    subgraph_feature = cbind(subgraph_feature,newvar) 
    subgraph_feature$dummy = NULL
    
  }else if(dim(input_data)[2] == 1 ){
    subgraph_feature = as.data.frame(input_data)
    colnames(newvar) = paste( sub("_.*", '', colnames(input_data)[1]) , sub(".*_", '',colnames(input_data)[1]) , sep= "_")
  }else{
    subgraph_feature = data.frame(matrix(NA, nrow = 844, ncol = 1)) # nrow = Patient number
    colnames(subgraph_feature) = "dummy"
  }
  outcome = list(subgraph_feature, fit) # scales=attributes(sc)
  return(outcome)
}

In [9]:
%%R -i=SUBGRAPH_15_RDATA,GENOTYPE_DATA,INPUT_FOR_MODEL

load(SUBGRAPH_15_RDATA)
userSNPs <- read.csv(GENOTYPE_DATA, row.names = 1, stringsAsFactors=FALSE)
userSNPs <- as.data.frame(t(userSNPs), stringsAsFactors = FALSE)

snp_mat = list()
k = 1
for (i in subgraph15.snps){
  snp_mat[[k]] = select(userSNPs, i)
  k = k + 1
}

names(snp_mat) <- names(subgraph15.snps)
save(snp_mat,file = INPUT_FOR_MODEL)


Dictionary of subgraphs to dataframe slices for the SNPs in the subgraph

In [55]:
%%R -i=USER_AUTOENCODER_MATRIX
# see: https://ipython.org/ipython-doc/2/config/extensions/rmagic.html

# apply main function on each mechanisms matrices
#model.store <- lapply(snp_mat, function(x) ann_fun(x))
                      
# Stores the model
# save(model.store, file = "model.store.rosmapIdibaps-148_full.RData")


user_autoencoder_matrix=matrix(nrow = 194, ncol = 15) # 358 PD patients and 15 subgraphs

j=1
for (i in model.store){
  autoen[,j]=i[[1]][[1]]
  j=j+1
}
rownames(autoen) <- rownames(snp_mat[[1]])
colnames(autoen) <- names(model.store)
save(user_autoencoder_matrix, file = USER_AUTOENCODER_MATRIX)


In [44]:
%%R
#' save models in h2o format

#an_model <- lapply(model.store, '[[',2)  # This returns a list with only the 2nd element (h2o models)

# for (i in an_model){
#   h2o.saveModel(i, path = "autoen_rosmapIdibaps-148_h2omodel_full/")
# }

#h2o.shutdown(prompt = FALSE)
#gc()


R[write to console]: Fehler in dimnames(x) <- dn : 
  la longitud de 'dimnames' [1] no es igual a la extensión del arreglo
Ruft auf: <Anonymous> -> <Anonymous> -> withVisible -> rownames<-

R[write to console]: Además: 
R[write to console]: Warnmeldungen:

R[write to console]: 1: 
R[write to console]: In h2o.clusterInfo() :
R[write to console]:  
Your H2O cluster version is too old (5 months and 24 days)!
Please download and install the latest version from http://h2o.ai/download/

R[write to console]: 2: 
R[write to console]: In h2o.clusterInfo() :
R[write to console]:  
Your H2O cluster version is too old (5 months and 24 days)!
Please download and install the latest version from http://h2o.ai/download/




Fehler in dimnames(x) <- dn : 
  la longitud de 'dimnames' [1] no es igual a la extensión del arreglo
Ruft auf: <Anonymous> -> <Anonymous> -> withVisible -> rownames<-


In [43]:
%%R -i=AUTOENCODER_TRAINED_MATRIX,TRAINED_PATIENT_CLUSTERS,CLUSTER_CLASSIFIER

### Start up a 1-node H2O server on the local machine, and allow it to use all CPU cores and up to 6GB of memory:
h2o.init(nthreads=-1, min_mem_size="6G")

### Import autoencoder data set #subgraph15
load(AUTOENCODER_TRAINED_MATRIX)
main_data <- data.frame(autoen)

#### read the cluster assignments of each patients
load(TRAINED_PATIENT_CLUSTERS)
rownames(clusters) <- clusters[,1]
clusters[,1] <- NULL


##### merge cluster assignment to the dataset
fin_data <- merge(clusters,main_data,by="row.names")
fin_data$clusters <- as.factor(fin_data$clusters)
rownames(fin_data) = fin_data$Row.names
fin_data <- fin_data[,-1]
fullD <- as.h2o(fin_data) # get complete data set into h2o frame for cross validation approach

##### Training Classifier

y = "clusters" # response variable
#x = names(trainData)
x = names(fullD)
x = x[-which(x==y)] # predictor variables

### Train Model
snpModel = h2o.glm(training_frame = fullD, 
                   #training_frame = trainD, # keep it commented while using cross validation
                   #validation_frame = validD, # keep it commented while using cross validation
                   x = x, 
                   y = y,
                   nfolds = 10, 
                   family='multinomial',
                   solver='L_BFGS',
                   lambda_search=TRUE)

print(snpModel)
h2o.performance(snpModel, xval = TRUE)
h2o.saveModel(snpModel, path=CLUSTER_CLASSIFIER)

#### Now predict the Test Data with the trained model #####
#snpModel = h2o.loadModel("/home/memon/genetic_analyses/ann/output/subgraph15/classifier_adpd_ann30/GLM_model_R_1552061363922_1")

 Connection successful!

R is connected to the H2O cluster: 
    H2O cluster uptime:         18 minutes 33 seconds 
    H2O cluster timezone:       Europe/Berlin 
    H2O data parsing timezone:  UTC 
    H2O cluster version:        3.26.0.2 
    H2O cluster version age:    5 months and 24 days !!! 
    H2O cluster name:           H2O_started_from_R_danieldomingo_gok171 
    H2O cluster total nodes:    1 
    H2O cluster total memory:   5.67 GB 
    H2O cluster total cores:    4 
    H2O cluster allowed cores:  4 
    H2O cluster healthy:        TRUE 
    H2O Connection ip:          localhost 
    H2O Connection port:        54321 
    H2O Connection proxy:       NA 
    H2O Internal Security:      FALSE 
    H2O API Extensions:         Amazon S3, XGBoost, Algos, AutoML, Core V3, Core V4 
    R Version:                  R version 3.6.2 (2019-12-12) 

Model Details:

H2OMultinomialModel: glm
Model ID:  GLM_model_R_1579515758272_2 
GLM Model: summary
       family        link             

In [58]:
%%R -i=CLUSTER_CLASSIFIER

#### Now predict the Test Data with the trained model #####
snpModel = h2o.loadModel(CLUSTER_CLASSIFIER)

#### for autoencoder predcited AET_PD data based predcition ####
load(USER_AUTOENCODER_MATRIX) # rosmap dataset predcited (h2o.deepfeature) with autoen30 model
testh2o <- as.h2o(user_autoencoder_matrix)

##########################################

### prediction on test data set
prediction = h2o.predict(snpModel, newdata=testh2o)

predicted.cl = as.data.frame(prediction$predict)
names(predicted.cl) <- "clusters"
predicted.cl$clusters <- as.integer(as.character(gsub("Cluster_", "", predicted.cl$clusters)))

# merge predicted clusters to test data
# so all patients in test data set get a cluster assignment based on their mechanism profile (predictro variables)
predicted_cl.testdata <- cbind(predicted.cl,testdata)
#save(predicted_cl.testdata,file="/home/memon/genetic_analyses/ann/output/subgraph15/ann30_cluster_prediction_adni148_nonad.RData")


  |                                                                      |   0%

java.lang.IllegalArgumentException: Test/Validation dataset has no columns in common with the training set

java.lang.IllegalArgumentException: Test/Validation dataset has no columns in common with the training set
	at hex.Model.adaptTestForTrain(Model.java:1325)
	at hex.Model.adaptTestForTrain(Model.java:1164)
	at hex.Model.score(Model.java:1442)
	at water.api.ModelMetricsHandler$1.compute2(ModelMetricsHandler.java:381)
	at water.H2O$H2OCountedCompleter.compute(H2O.java:1417)
	at jsr166y.CountedCompleter.exec(CountedCompleter.java:468)
	at jsr166y.ForkJoinTask.doExec(ForkJoinTask.java:263)
	at jsr166y.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:974)
	at jsr166y.ForkJoinPool.runWorker(ForkJoinPool.java:1477)
	at jsr166y.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:104)



R[write to console]: Error: java.lang.IllegalArgumentException: Test/Validation dataset has no columns in common with the training set




Error: java.lang.IllegalArgumentException: Test/Validation dataset has no columns in common with the training set


In [49]:
%%R 

autoen

               [,1]       [,2]         [,3]         [,4]         [,5]
  [1,]  0.260069725  0.6237764  0.627048575  0.534933808 -0.110982040
  [2,] -0.325248797 -0.2485446 -0.375581488  0.689664841 -0.105521715
  [3,] -0.107234732  0.4832413 -0.356542345 -0.352223573 -0.586388614
  [4,]  0.350254868  0.5759488  0.237063109 -0.558518908 -0.510031298
  [5,] -0.086288326  0.5961460  0.052685213  0.370495990  0.250162979
  [6,] -0.324163644  0.6151452  0.723294621 -0.340390554 -0.327286626
  [7,] -0.037013073 -0.4206191 -0.079353615  0.546247319 -0.596192299
  [8,]  0.100440960 -0.3703159  0.577046097  0.160689241 -0.580963419
  [9,]  0.123154934 -0.2919445 -0.697402390 -0.296011073  0.008835838
 [10,] -0.196356554 -0.4258876 -0.473785002  0.286426837  0.426450959
 [11,] -0.411307818  0.7018062 -0.837806985  0.677843241 -0.089350239
 [12,] -0.143840664 -0.4669601  0.191222994  0.506073198 -0.084010801
 [13,] -0.422494289 -0.4475225 -0.623981697 -0.377238799 -0.191134091
 [14,] -0.380776047 

 [37,] -0.36472171  0.4741841500  0.6347277893 -0.1217634064 -0.64914512
 [38,] -0.36472171  0.3739628928  0.6063130685  0.4156114604 -0.16840711
 [39,] -0.54466899 -0.4416850268  0.6409585860  0.4895816485  0.53062306
 [40,] -0.71136749  0.0987314299  0.1770319856 -0.0253942762 -0.65920007
 [41,]  0.12067733  0.4096511103  0.5991521147  0.4183454737 -0.20308891
 [42,] -0.71136749 -0.6896987488  0.5951951298  0.5176963310  0.60552857
 [43,] -0.17657697 -0.2536470210  0.2418106971  0.3272463805 -0.71671450
 [44,] -0.36472171 -0.6642910953  0.5991521147 -0.0494761715 -0.14412477
 [45,] -0.71136749 -0.0736582523  0.1472721395  0.0831995594 -0.70500342
 [46,] -0.36472171  0.3739628928  0.5823232312 -0.1596688252 -0.71250383
 [47,] -0.71136749 -0.1128933123  0.4682313211 -0.5727227120 -0.14412477
 [48,] -0.36472171 -0.5186042496  0.5858861315  0.2826398581 -0.14385426
 [49,] -0.36472171 -0.1365786670  0.2363745944 -0.3996738645 -0.22031243
 [50,] -0.71136749  0.3644530608  0.4992305789 -0.3

From cffi callback <function _consolewrite_ex at 0x10bc38170>:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/rpy2/rinterface_lib/callbacks.py", line 131, in _consolewrite_ex
    consolewrite_print(s)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/rpy2/rinterface_lib/callbacks.py", line 114, in consolewrite_print
    print(s, end='', flush=True)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/ipykernel/iostream.py", line 351, in flush
    if not evt.wait(self.flush_timeout):
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py", line 552, in wait
    signaled = self._cond.wait(timeout)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py", line 300, in wait
    gotit = waiter.acquire(True, timeout)
KeyboardInterrupt


  0.12067733  0.3956425469  0.1770319856  0.0528042504 -0.22864273
[142,] -0.71136749  0.1086288146  0.5991521147  0.1079475549 -0.13464282
[143,] -0.71136749 -0.4572086294  0.3044233530  0.4502033031 -0.22033306
[144,] -0.71136749  0.4395336115  0.5858861315  0.2784997719 -0.22033306
[145,] -0.36472171 -0.4894251257  0.6028352729  0.3880864534 -0.71671450
[146,] -0.71136749  0.1965683416  0.6026075130  0.3041514315 -0.65920007
[147,] -0.07791074  0.1086288146  0.6292036143  0.5176963310 -0.23726699
[148,] -0.36472171  0.0064705743  0.2710131847 -0.3008651002  0.61957693
[149,] -0.54466899  0.2742419050  0.5991521147  0.0751825780 -0.14385426
[150,] -0.36472171  0.5420345005  0.6190366737  0.3951599102 -0.70956516
[151,] -0.71136749 -0.3325906928  0.5858861315  0.2549821636 -0.14412477
[152,] -0.36472171 -0.7806184306  0.6154677162  0.3364250625 -0.70500342
[153,] -0.54466899  0.2036155796  0.2363745944  0.3098495424 -0.65841661
[154,] -0.71136749  0.1346913190  0.6310409646  0.3805145

[174,] -0.0589465952  0.1958107 -0.31334611 -0.210670944 -0.3314589499
[175,]  0.1102513019  0.7279751 -0.21570051 -0.521241340 -0.4012873742
[176,]  0.4610980053  0.1958107 -0.26836326 -0.418492896  0.4851437190
[177,] -0.4283319946  0.1759815 -0.67426354  0.046446254  0.4851437190
[178,] -0.0438949949  0.7279751 -0.26836326  0.001198311  0.0826455634
[179,] -0.3282425055  0.7279751 -0.36288841 -0.521241340  0.4088592471
[180,] -0.3327565817  0.1759815 -0.26203735 -0.533988880  0.3746915541
[181,] -0.1299451908  0.7279751 -0.67426354 -0.176366313  0.4341530083
[182,]  0.0794824062  0.7279751 -0.67426354 -0.477113733  0.5267512805
[183,]  0.0965542332  0.7181752 -0.61281124 -0.212107878 -0.0825751232
[184,] -0.1052295494  0.7279751 -0.61281124 -0.301052048  0.4054560812
[185,]  0.0906227983  0.1958107 -0.31334611  0.129517455 -0.1860713286
[186,] -0.1299451908  0.7279751 -0.67426354 -0.099902627  0.3831650324
[187,] -0.4283319946  0.7279751  0.31337033 -0.082383889  0.0215049979
[188,]