In [1]:
import warnings
from rpy2.rinterface import RRuntimeWarning
warnings.filterwarnings('ignore',category=FutureWarning)
warnings.filterwarnings('ignore',category=RRuntimeWarning)
import sys
import os

In [2]:
import pandas as pd
import numpy as np

In [3]:
from rpy2.robjects import pandas2ri
pandas2ri.activate()

In [4]:
# Enable R magic
%load_ext rpy2.ipython

In [5]:
HERE = os.path.dirname(os.path.realpath('__file__'))
PROJECT = os.path.abspath(os.path.join(HERE, '..'))
DATA = os.path.join(PROJECT, 'data')

GENOTYPE_DATA = os.path.join(DATA, 'Asif_Genotype_Disease_Only_ROSMAP.csv')
SUGBRAPH_SNPS = os.path.join(DATA, 'subgraphs15_snps_mod1.csv')
SUBGRAPH_15_RDATA = os.path.join(DATA, 'subgraph15_snpset148.RData')

INPUT_FOR_MODEL = os.path.join(DATA, "rosmap148.snp.mat.RData")

In [6]:
%%R

library(h2o)
library(randomForest)
library(glmnet)
library(gbm)
library(bnlearn)
library(ggplot2)
library(CORElearn)
library(dplyr)

R[write to console]: 
----------------------------------------------------------------------

Your next step is to start H2O:
    > h2o.init()

For H2O package documentation, ask for help:
    > ??h2o

After starting H2O, you can use the Web UI at http://localhost:54321
For more information visit http://docs.h2o.ai

----------------------------------------------------------------------


R[write to console]: 
Attache Paket: ‘h2o’


R[write to console]: The following objects are masked from ‘package:stats’:

    cor, sd, var


R[write to console]: The following objects are masked from ‘package:base’:

    %*%, %in%, &&, apply, as.factor, as.numeric, colnames, colnames<-,
    ifelse, is.character, is.factor, is.numeric, log, log10, log1p,
    log2, round, signif, trunc, ||


R[write to console]: randomForest 4.6-14

R[write to console]: Type rfNews() to see new features/changes/bug fixes.

R[write to console]: Lade nötiges Paket: Matrix

R[write to console]: Loaded glmnet 3.0-2


R[write

In [7]:
%%R
#### main function ####

ann_fun = function(input_data){
  if(dim(input_data)[2] > 1)
  {
    h2o.init(nthreads = -1, max_mem_size = "12g")
    
    #sc = scale(input_data)
    myx = as.h2o(scale(input_data)) 
    
    n = round(dim(input_data)[2])
    m = round(dim(input_data)[2]/2)
    
    hyper_params <- list(hidden = list(1, c(n, 1), c(n,m,1), c(m,1)),
                         input_dropout_ratio = c(0,0.05, 0.2,0.5),
                         l2=10^c(-4:4))
    
    
    r = sample(20:60000, 1)
 
  grid = h2o.grid("deeplearning", 
                    grid_id = paste("mygrid", r, sep="_"), 
                    autoencoder= TRUE,
                  x = colnames(myx),
                    training_frame = myx, 
                    seed=1234567, 
                    stopping_metric="MSE", 
                    stopping_rounds = 5,
                    #activation= "TanhWithDropout",
                    activation= "Tanh",
                    standardize=TRUE,
                    epochs=500,
                    hyper_params = hyper_params)
    
    gbm_sorted_grid <- h2o.getGrid(grid_id = paste("mygrid", r, sep="_"), sort_by = "mse")
    fit <- h2o.getModel(gbm_sorted_grid@model_ids[[1]])
    #nlayers = length(strsplit(gbm_sorted_grid@summary_table[1,1], ",")[[1]])
    #browser()
    nlayers = length(strsplit(substr(gbm_sorted_grid@summary_table[1,1], 2, nchar(gbm_sorted_grid@summary_table[1,1])-1), ",")[[1]])
    newvar = as.data.frame(h2o.deepfeatures(fit, myx, nlayers))
 
  
    #Rename column
 
    newvar = as.data.frame(newvar[,1])
    colnames(newvar) = paste( sub("_.*", '', colnames(input_data)[1]) , sub(".*_", '',colnames(input_data)[1]) , sep= "_")
    
   
    #save meta-features in variable 
    subgraph_feature = data.frame(matrix(NA, nrow = 194, ncol = 1)) #SNP data shape
    # subgraph_feature = data.frame(matrix(NA, nrow = 486, ncol = 1)) #SNP Data shape 
    colnames(subgraph_feature) = "dummy"
 
  
    subgraph_feature = cbind(subgraph_feature,newvar) 
    subgraph_feature$dummy = NULL
    
  }else if(dim(input_data)[2] == 1 ){
    subgraph_feature = as.data.frame(input_data)
    colnames(newvar) = paste( sub("_.*", '', colnames(input_data)[1]) , sub(".*_", '',colnames(input_data)[1]) , sep= "_")
  }else{
    subgraph_feature = data.frame(matrix(NA, nrow = 194, ncol = 1)) #844
    # subgraph_feature = data.frame(matrix(NA, nrow = 486, ncol = 1)) #844
    colnames(subgraph_feature) = "dummy"
  }
  outcome = list(subgraph_feature, fit) # scales=attributes(sc)
  return(outcome)
  #return(subgraph_feature)
}

In [8]:
# snps_data = pd.read_csv(GENOTYPE_DATA, index_col=0)
# snps_data = snps_data.transpose()

In [9]:
%%R -i=SUBGRAPH_15_RDATA,GENOTYPE_DATA,INPUT_FOR_MODEL

load(SUBGRAPH_15_RDATA)
userSNPs <- read.csv(GENOTYPE_DATA, row.names = 1, stringsAsFactors=FALSE)
userSNPs <- as.data.frame(t(userSNPs), stringsAsFactors = FALSE)

snp_mat = list()
k = 1
for (i in subgraph15.snps){
  snp_mat[[k]] = select(userSNPs, i)
  k = k + 1
}

names(snp_mat) <- names(subgraph15.snps)
save(snp_mat,file = INPUT_FOR_MODEL)


In [10]:
# snps_data.head()

In [12]:
# snps_data.shape

In [13]:
# snps_list = pd.read_csv(
#     SUGBRAPH_SNPS,header=None, index_col=0
# )

# subgraph2snps = {
#     index: snps.dropna().tolist()
#     for index, snps in snps_list.iterrows()
# }

Dictionary of subgraphs to dataframe slices for the SNPs in the subgraph

In [14]:
# snp_mat = [
#     snps_data[snps]
#     for _, snps in subgraph2snps.items()
# ]
# subgraph_4 = pandas2ri.py2rpy_pandasdataframe(snp_mat['Subgraph_4'])

In [None]:
%%R
# see: https://ipython.org/ipython-doc/2/config/extensions/rmagic.html

# apply main function on each mechanisms matrices
model.store <- lapply(snp_mat, function(x) ann_fun(x))

####### This does not work to apply for each subgraph #######
#model.store <- lapply(subgraph_4, function(x) ann_fun(x))
save(model.store, file = "model.store.rosmapIdibaps-148_full.RData")


autoen=matrix(nrow = 194, ncol = 15) # 358 PD patients and 15 subgraphs

j=1
for (i in model.store){
  autoen[,j]=i[[1]][[1]]
  j=j+1
}

rownames(autoen) <- rownames(snp.mat[[1]])
colnames(autoen) <- names(model.store)
save(autoen, file = "/home/memon/genetic_analyses/ann/output/subgraph15/autoen_matrix_rosmapIdibaps-148_full.RData")



H2O is not running yet, starting it now...

Note:  In case of errors look at the following log files:
    /var/folders/yx/ppwbtpyn61l2796mnr1t62f00000gn/T//RtmpqcZMqn/h2o_danieldomingo_started_from_r.out
    /var/folders/yx/ppwbtpyn61l2796mnr1t62f00000gn/T//RtmpqcZMqn/h2o_danieldomingo_started_from_r.err


Starting H2O JVM and connecting: . Connection successful!

R is connected to the H2O cluster: 
    H2O cluster uptime:         2 seconds 286 milliseconds 
    H2O cluster timezone:       Europe/Berlin 
    H2O data parsing timezone:  UTC 
    H2O cluster version:        3.26.0.2 
    H2O cluster version age:    5 months and 24 days !!! 
    H2O cluster name:           H2O_started_from_R_danieldomingo_ekm683 
    H2O cluster total nodes:    1 
    H2O cluster total memory:   10.67 GB 
    H2O cluster total cores:    4 
    H2O cluster allowed cores:  4 
    H2O cluster healthy:        TRUE 
    H2O Connection ip:          localhost 
    H2O Connection port:        54321 
    H2O Conn

In [None]:
%%R
#' save models in h2o format

an_model <- lapply(model.store, '[[',2)  # This returns a list with only the 2nd element (h2o models)

for (i in an_model){
  h2o.saveModel(i, path = "/home/memon/genetic_analyses/ann/output/subgraph15/autoen_rosmapIdibaps-148_h2omodel_full/")
}

h2o.shutdown(prompt = FALSE)
gc()


In [None]:
%%R 


In [None]:
%%R