In [1]:
import warnings
from rpy2.rinterface import RRuntimeWarning
warnings.filterwarnings('ignore',category=FutureWarning)
warnings.filterwarnings('ignore',category=RRuntimeWarning)
import sys

In [2]:
import pandas as pd
import numpy as np
from sklearn import datasets

In [3]:
from rpy2.robjects import pandas2ri
pandas2ri.activate()

In [6]:
# Enable R magic
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [None]:
%%R

library(h2o)
library(randomForest)
library(glmnet)
library(gbm)
library(bnlearn)
library(ggplot2)
library(CORElearn)

In [None]:
%%R

#### main function ####

ann_fun = function(input_data){
  if(dim(input_data)[2] > 1)
  {
    h2o.init(nthreads = -1, max_mem_size = "12g")
    
    #sc = scale(input_data)
    myx = as.h2o(scale(input_data)) 
    
    n = round(dim(input_data)[2])
    m = round(dim(input_data)[2]/2)
    
    hyper_params <- list(hidden = list(1, c(n, 1), c(n,m,1), c(m,1)),
                         input_dropout_ratio = c(0,0.05, 0.2,0.5),
                         l2=10^c(-4:4))
    
    
    r = sample(20:60000, 1)
    grid = h2o.grid("deeplearning", 
                    grid_id = paste("mygrid", r, sep="_"), 
                    autoencoder = TRUE,
                    x = colnames(myx),
                    training_frame = myx, 
                    seed=1234567, 
                    stopping_metric="MSE", 
                    stopping_rounds = 5,
                    #activation= "TanhWithDropout",
                    activation= "Tanh",
                    standardize=TRUE,
                    epochs=500,
                    hyper_params = hyper_params)
    
    gbm_sorted_grid <- h2o.getGrid(grid_id = paste("mygrid", r, sep="_"), sort_by = "mse")
    fit <- h2o.getModel(gbm_sorted_grid@model_ids[[1]])
    #nlayers = length(strsplit(gbm_sorted_grid@summary_table[1,1], ",")[[1]])
    #browser()
    nlayers = length(strsplit(substr(gbm_sorted_grid@summary_table[1,1], 2, nchar(gbm_sorted_grid@summary_table[1,1])-1), ",")[[1]])
    newvar = as.data.frame(h2o.deepfeatures(fit, myx, nlayers))
    
    #Rename column
    newvar = as.data.frame(newvar[,1])
    colnames(newvar) = paste( sub("_.*", '', colnames(input_data)[1]) , sub(".*_", '',colnames(input_data)[1]) , sep= "_")    
    
    #save meta-features in variable 
    # need to change the nrow depending on number of samples(patients)
    subgraph_feature = data.frame(matrix(NA, nrow = 540, ncol = 1)) #540
    colnames(subgraph_feature) = "dummy"
    
    subgraph_feature = cbind(subgraph_feature,newvar) 
    subgraph_feature$dummy = NULL
    
  }else if(dim(input_data)[2] == 1 ){
    subgraph_feature = as.data.frame(input_data)
    colnames(newvar) = paste( sub("_.*", '', colnames(input_data)[1]) , sub(".*_", '',colnames(input_data)[1]) , sep= "_")
  }else{
    # need to change the nrow depending on number of samples(patients)
    subgraph_feature = data.frame(matrix(NA, nrow = 540, ncol = 1)) #540
    colnames(subgraph_feature) = "dummy"
  }
  outcome = list(subgraph_feature, fit) # scales=attributes(sc)
  return(outcome)
  #return(subgraph_feature)
}


In [None]:
snps_data = pd.read_csv('C:/Users/Asif Khan/predictme/data/Asif_Genotype_Disease_Only_ROSMAP.CSV', index_col=0)
snps_data = snps_data.transpose()

In [None]:
snps_data.head()

In [None]:
snps_list = pd.read_csv(
    'C:/Users/Asif Khan/predictme/data/subgraphs15_snps_mod1.csv',header=None, index_col=0
)

subgraph2snps = {
    index: snps.dropna().tolist()
    for index, snps in snps_list.iterrows()
}

In [None]:
snp_mat = [
    snps_data[snps]
    for subgraph, snps in subgraph2snps.items()
]

In [None]:
snp_mat

In [None]:
# Load Mechanisms*Patients*SNPs Data (e.g., 15 mechanisms* 540 Patients * n SNPs)
%%R

load("/home/memon/genetic_analyses/ann/output/subgraph15/rosmapDisIdibapsAetpd148.snp.mat.RData")

In [None]:
# apply main function on each mechanisms matrices
%%R

model.store <-lapply(snp.mat, function(x) ann_fun(x))
save(model.store, file = "/home/memon/genetic_analyses/ann/output/subgraph15/model.store.rosmapIdibaps-148_4.RData")


autoen=matrix(nrow = 540, ncol = 15) # 358 PD patients and 15 subgraphs

j=1
for (i in model.store){
  autoen[,j]=i[[1]][[1]]
  j=j+1
}

rownames(autoen) <- rownames(snp.mat[[1]])
colnames(autoen) <- names(model.store)
save(autoen, file = "/home/memon/genetic_analyses/ann/output/subgraph15/autoen_matrix_rosmapIdibaps-148_4.RData")


In [None]:
#' save models in h2o format
%%R

an_model <- lapply(model.store, '[[',2)  # This returns a list with only the 2nd element (h2o models)

for (i in an_model){
  h2o.saveModel(i, path = "/home/memon/genetic_analyses/ann/output/subgraph15/autoen_rosmapIdibaps-148_h2omodel_4/")
}

h2o.shutdown(prompt = FALSE)
gc()


In [None]:
%%R



In [None]:
%%R



In [6]:
%%R 

fit = lm(mtcars$cyl ~ mtcars$mpg)
coefficients <- coef(fit)


In [7]:
%%R
summary(fit)