# Compare model performance between real and permuted hetnets

In [1]:
library(magrittr)

In [2]:
auroc_df = readr::read_tsv('data/auroc.tsv')
degrees = dplyr::filter(auroc_df, feature_type == 'degree')$feature
metapaths = dplyr::filter(auroc_df, feature_type == 'dwpc')$feature

# col_types not needed here, but used for safety
col_types = list()
for (metapath in metapaths) {
  col_types[[metapath]] = readr::col_number()
}
for (degree in degrees) {
  col_types[[degree]] = readr::col_integer()
}

feature_df = readr::read_tsv('data/matrix/features.tsv.bz2', col_types = col_types)

In [3]:
transform_df = function(df) {
    
  df = dplyr::bind_cols(
    df %>%
    dplyr::transmute(status, prior_logit = boot::logit(prior_prob)),

    df %>%
    dplyr::select(one_of(degrees)) %>%
    dplyr::mutate_each(dplyr::funs(asinh)),

    df %>%
    dplyr::select(one_of(metapaths)) %>%
    dplyr::mutate_each(dplyr::funs(asinh(. / mean(.))))
  )
  return(df)
}

transformed_df = feature_df %>%
  dplyr::group_by(hetnet) %>%
  dplyr::do(transform_df(.)) %>%
  dplyr::ungroup()

head(transformed_df, 2)

Unnamed: 0,hetnet,status,prior_logit,CbG,CcSE,CdG,CiPC,CpD,CrC,CtD,ellip.h,CuGuDpCpD,CuGuDpCtD,CuGuDpSpD,CuGuDrD,CuGuDrDrD,CuGuDtCpD,CuGuDtCtD,CuGuDuGaD,CuGuDuGdD,CuGuDuGuD
1,rephetio-v2.0,0,-4.615442,1.443635,6.210604,0,0.8813736,0,2.644121,1.443635,⋯,0,0,0.01067385,0,0,0,0,0,0.1365873,0.1364012
2,rephetio-v2.0,0,-4.615442,2.644121,4.094622,0,0.8813736,0,2.644121,1.443635,⋯,0,0,0.0,0,0,0,0,0,0.0,0.0


In [4]:
fit_list = list()
i = 0
get_performance = function(df, incl_degrees) {
  for (seed in 1:5) {
    for (alpha in 0:1) {
      i <<- i + 1
      y = df$status
      X = df %>% dplyr::select(-status, -hetnet) %>% as.matrix()
      penalty_factor = ifelse(colnames(X) == 'prior_logit', 0, 1)
      fit = hetior::glmnet_train(X = X, y = y, alpha = alpha, cores = 5, seed=seed,
        penalty.factor=penalty_factor, lambda.min.ratio=1e-6, nlambda=200
      )
      fit$name = df$hetnet[1]
      fit$incl_degrees = incl_degrees
      fit_list[[i]] <<- fit
    }
  }
  return(data.frame(i))
}

temp = transformed_df %>%
  dplyr::group_by(hetnet) %>%
  dplyr::do(get_performance(., incl_degrees=1))

temp = transformed_df %>%
  dplyr::select(-one_of(degrees)) %>%
  dplyr::group_by(hetnet) %>%
  dplyr::do(get_performance(., incl_degrees=0))

Loading required package: Matrix
Loading required package: foreach
Loaded glmnet 2.0-5



In [5]:
result_df = fit_list %>% lapply(function(l) {
  dplyr::data_frame(
    name = l$name,
    alpha = l$alpha,
    incl_degrees = l$incl_degrees,
    seed = l$seed,
    auroc = l$vtm$auroc,
    auprc = l$vtm$auprc,
    tjur = l$vtm$tjur
  )
    
}) %>% dplyr::rbind_all()

head(result_df, 2)

Unnamed: 0,name,alpha,incl_degrees,seed,auroc,auprc,tjur
1,rephetio-v2.0,0,1,1,0.991685,0.9738731,0.7414739
2,rephetio-v2.0,1,1,1,0.993174,0.9745725,0.7897991


In [6]:
result_df %>% readr::write_tsv('data/model-performances.tsv')

In [7]:
summary_df = result_df %>%
  dplyr::mutate(permuted = as.integer(grepl('perm', result_df$name))) %>%
  dplyr::group_by(permuted, alpha, incl_degrees, alpha) %>%
  dplyr::do(
    dplyr::bind_cols(
      ggplot2::mean_cl_normal(.$tjur) %>% dplyr::rename(tjur=y, tjur_lower=ymin, tjur_upper=ymax),
      ggplot2::mean_cl_normal(.$auroc) %>% dplyr::rename(auroc=y, auroc_lower=ymin, auroc_upper=ymax),
      ggplot2::mean_cl_normal(.$auprc) %>% dplyr::rename(auprc=y, auprc_lower=ymin, auprc_upper=ymax))
  ) %>%
  dplyr::ungroup() %>%
  dplyr::arrange(desc(tjur))

summary_df %>% readr::write_tsv('data/model-performances-summary.tsv')
summary_df

Unnamed: 0,permuted,alpha,incl_degrees,tjur,tjur_lower,tjur_upper,auroc,auroc_lower,auroc_upper,auprc,auprc_lower,auprc_upper
1,0,1,1,0.7842567,0.777051,0.7914623,0.9925687,0.9917722,0.9933651,0.9728341,0.9705656,0.9751026
2,0,0,1,0.7457139,0.7371756,0.7542523,0.9921086,0.9912643,0.9929529,0.9748692,0.9728735,0.9768648
3,0,1,0,0.7259513,0.7259513,0.7259513,0.9811947,0.9811947,0.9811947,0.9482232,0.9482232,0.9482232
4,0,0,0,0.7127112,0.7041414,0.7212811,0.9871499,0.9860367,0.9882632,0.9645855,0.9622771,0.9668939
5,1,1,1,0.696059,0.6861327,0.7059853,0.9834288,0.9820289,0.9848286,0.9453694,0.941303,0.9494357
6,1,0,1,0.670025,0.6594773,0.6805727,0.9850796,0.9837318,0.9864274,0.9607366,0.9579773,0.9634958
7,1,1,0,0.6337473,0.6255932,0.6419013,0.9717641,0.9701234,0.9734048,0.9233133,0.9190013,0.9276253
8,1,0,0,0.6066769,0.5966338,0.61672,0.9742294,0.9723438,0.976115,0.936097,0.9324339,0.9397602
