In [None]:
library(arrow)
library(dplyr)
library(mgcv)
library(tidyverse)
library(data.table)
library(metagam)
library(latex2exp)
library(eivtools)
library(plotly)
library(stats)
library(plotrix)
library(fishmethods)
library(metafor)
library(jsonlite)
library(stringr)
library(doParallel)
library(parallel)
library(MASS)
library(tidygam)

In [None]:
stg <- "stg01"
fs <-  'nofs'
oversample <- 'raw'
model_type <- 'catd'

In [None]:
path <- '/home/hoyinchan/blue/Data/data2021/data2021/shapalltmp.parquet'
dfraw <- arrow::read_parquet(path)
dfraw <- dfraw %>% dplyr::select(-'__index_level_0__')
dfraw <- dfraw %>% dplyr::filter(site_d != 'MCRI') %>% dplyr::filter(site_m != 'MCRI')
dfraw$site_d <- as.factor(dfraw$site_d)
dfraw$site_m <- as.factor(dfraw$site_m)

In [None]:
targets <- unique(dfraw$Feature)

In [None]:
cattarget <- list("PX:CH:J1940", "PX:09:96.72")

In [None]:
dfmean <- dfraw %>% dplyr::select(c('site_d', 'site_m', 'roc')) %>% distinct() %>% dplyr::select(c('roc'))
dfmean <- mean(dfmean$roc)
dfcount <- dfraw %>% dplyr::select(c('site_d', 'site_m', 'roc')) %>% group_by(site_d, site_m) %>% summarise(total_count=n(),.groups = 'drop')
dfraw <- dfraw %>% left_join(dfcount, by=c('site_d', 'site_m'))

#dfraw$rocw <- dfraw$roc/dfmean
#dfraw$rocw <- 1/dfraw$roc2
dfraw$rocw <- (1/dfraw$total_count)/mean(1/dfraw$total_count)

In [None]:
fit_proc <- function(eqn, dfraw2, target, type, weight=FALSE){
    if (weight==FALSE){
        xfit <- bam(eqn, data=dfraw2, method='REML')  
    }else{
        xfit <- bam(eqn, data=dfraw2, method='REML', weight=rocw)  
    }
    print(target)
    print(type)
    print(summary(xfit))
    flush.console()
    sxfit<-summary(xfit)
    pxfit<-plot(xfit)
    pxfit2<-termplot(xfit, data=dfraw2, se = TRUE, plot = FALSE)
    for (i in 1:length(pxfit)){
        pxfit[[i]]$raw=NULL    
    }
    return(list(target, type, sxfit, pxfit, pxfit2))
}    

In [None]:
gam_proc <- function(outputname, cattarget, targets, dfraw, returnf=FALSE, weight=FALSE, noAUC=FALSE) {
    resultlist <- list()
    for (target in targets){
        print(target)
        flush.console()
        dfraw2 <- dfraw %>% filter(Feature==target)            
        if (target %in% cattarget){
            dfraw2$Name <- as.factor(dfraw2$Name)
            if (noAUC==FALSE){
                eqnl <- val ~ Name + s(site_d,bs="re") + roc2
                eqnq <- val ~ Name + s(site_d,bs="re") + roc2
                eqns <- val ~ Name + s(site_d,bs="re") + roc2
                eqnt <- val ~ Name + s(site_d,bs="re") + roc2 
                resultl<-fit_proc(eqnl, dfraw2, target, 'linear', weight=weight)
                resultq<-fit_proc(eqnq, dfraw2, target, 'quadratic', weight=weight)
                results<-fit_proc(eqns, dfraw2, target, 'spline', weight=weight)
                resultt<-fit_proc(eqnt, dfraw2, target, 'spline_interaction', weight=weight)            
                result<-list(resultl, resultq, results, resultt)
            }else{
                eqnl <- val ~ Name + s(site_d,bs="re")
                eqnq <- val ~ Name + s(site_d,bs="re")
                eqns <- val ~ Name + s(site_d,bs="re")
                eqnt <- val ~ Name + s(site_d,bs="re") 
                resultl<-fit_proc(eqnl, dfraw2, target, 'linear', weight=weight)
                resultq<-fit_proc(eqnq, dfraw2, target, 'quadratic', weight=weight)
                results<-fit_proc(eqns, dfraw2, target, 'spline', weight=weight)
                resultt<-fit_proc(eqnt, dfraw2, target, 'spline_interaction', weight=weight)            
                result<-list(resultl, resultq, results, resultt)
            }
        }else{
            if (noAUC==FALSE){
                eqnl <- val ~ poly(Name,1,raw=TRUE) + s(site_d,bs="re") + roc2
                eqnq <- val ~ poly(Name,2,raw=TRUE) + s(site_d,bs="re") + roc2
                eqns <- val ~ s(Name,k=10,bs='cr') + s(site_d,bs="re") + roc2
                eqnt <- val ~ s(Name,k=10,bs='cr') + s(site_d,bs="re") + roc2 + ti(Name,roc2,bs='cr')            
                resultl<-fit_proc(eqnl, dfraw2, target, 'linear')
                resultq<-fit_proc(eqnq, dfraw2, target, 'quadratic')
                results<-fit_proc(eqns, dfraw2, target, 'spline')
                resultt<-fit_proc(eqnt, dfraw2, target, 'spline_interaction')            
                result<-list(resultl, resultq, results, resultt)
            }else{
                eqnl <- val ~ poly(Name,1,raw=TRUE) + s(site_d,bs="re")
                eqnq <- val ~ poly(Name,2,raw=TRUE) + s(site_d,bs="re")
                eqns <- val ~ s(Name,k=10,bs='cr') + s(site_d,bs="re")
                eqnt <- val ~ s(Name,k=10,bs='cr') + s(site_d,bs="re") + ti(Name,roc2,bs='cr')            
                resultl<-fit_proc(eqnl, dfraw2, target, 'linear', weight=weight)
                resultq<-fit_proc(eqnq, dfraw2, target, 'quadratic', weight=weight)
                results<-fit_proc(eqns, dfraw2, target, 'spline', weight=weight)
                resultt<-fit_proc(eqnt, dfraw2, target, 'spline_interaction', weight=weight)            
                result<-list(resultl, resultq, results, resultt)
            }
        }
        resultlist <- append(resultlist, list(result))
    }
    if (returnf){
        return(resultlist)
    }
    output_to_python <- toJSON(resultlist, force = TRUE)
    write(output_to_python, paste0(outputname))
}

In [None]:
# outputname <- "gamalltmp_single_AUC_populationweight.json"
# gam_proc(outputname, cattarget, targets, dfraw, weight=TRUE, noAUC=FALSE)

In [None]:
outputname <- "gamalltmp_single_noAUC.json"
gam_proc(outputname, cattarget, targets, dfraw, weight=FALSE, noAUC=TRUE)

In [None]:
# outputname <- "gamalltmp_single_weightAUC2.json"
# gam_proc(outputname, cattarget, targets, dfraw, weight=TRUE, noAUC=TRUE)

In [None]:
dfraw2 <- dfraw %>% filter(Feature=='AGE') 
eqnl <- val ~ poly(Name,1,raw=TRUE) + s(site_d,bs="re") + roc2
eqnq <- val ~ poly(Name,2,raw=TRUE) + s(site_d,bs="re") + roc2
eqns <- val ~ s(Name,k=10,bs='cr') + s(site_d,bs="re") + roc2
eqnt <- val ~ s(Name,k=10,bs='cr') + s(site_d,bs="re") + roc2 + ti(Name,roc2,bs='cr')            
resultq<-fit_proc(eqnq, dfraw2, 'AGE', 'quadratic')
results<-fit_proc(eqns, dfraw2, 'AGE', 'spline')

In [None]:
summary(resultq)

In [None]:
## 2D

In [None]:
dfraw2 <- dfraw %>% filter(Feature=='AGE')
eqnl <- val ~ s(Name,k=10,bs='cr') + s(site_d,bs="re") + roc2
xfit <- bam(eqnl, data=dfraw2, method='REML') 
plot(xfit)

In [None]:
dfraw2 <- dfraw %>% filter(Feature=='AGE')
eqnl <- val ~ s(Name,k=10,bs='cr') + s(site_d,bs="re") + roc2 + s(site_d,roc2,bs="re")
xfit <- bam(eqnl, data=dfraw2, method='REML') 
plot(xfit)

In [None]:
summary(xfit)

In [None]:
# Try 2 feature interaction

In [None]:
target_combo = combn(targets,2, simplify = FALSE)
outputname <- "gamalltmp_double_interaction.json"

In [None]:
gam_proc2d <- function(cattarget, dfraw, f1, f2, stg, fs, oversample, model_type, returnf = FALSE) {

    f1str <- str_replace_all(f1,'::','_')
    f1str <- str_replace_all(f1str,'/','per')
    f1str <- str_replace_all(f1str,'\\(','_')
    f1str <- str_replace_all(f1str,'\\)','_')
    
    f2str <- str_replace_all(f2,'::','_')
    f2str <- str_replace_all(f2str,'/','per')
    f2str <- str_replace_all(f2str,'\\(','_')
    f2str <- str_replace_all(f2str,'\\)','_')    

    if (!returnf){
        filename <- paste0('/home/hoyinchan/blue/program_data/AKI_CDM_PY/MetaRegression/gam2d_tmp/','gam2d_tmp_noAUC','_',f1str,'_',f2str,'_',stg,'_',fs,'_',oversample,'_',model_type,'.json')
        if (file.exists(filename)){
            return()
        }
    }
    
    eqn_cc <- val ~ s(Name.x,k=10,bs='cr') + s(Name.y,k=10,bs='cr') + s(site_d,bs="re") + ti(Name.x,Name.y,k=10,bs='cr')
    eqn_cd <- val ~ s(Name.x,k=10,bs='cr') + s(Name.x,by=Name.y,k=10,bs='cr') + Name.y + s(site_d,bs="re")

    if (f1 %in% cattarget & !f2 %in% cattarget){
        tmp = f1
        f1 = f2
        f2 = tmp
    }

    dfraw21 <- dfraw %>% filter(Feature==f1)
    dfraw22 <- dfraw %>% filter(Feature==f2)
    dfraw23 <- dfraw21 %>% inner_join(dfraw22, by=c('ID','site_d', 'site_m', 'roc', 'roc2'))
    dfraw23 <- dfraw23 %>% mutate(val=val.x+val.y)

    if (!f1 %in% cattarget & f2 %in% cattarget){
        eqn <- eqn_cd
        dfraw23$Name.y <- as.factor(dfraw23$Name.y)
    } else if (!f1 %in% cattarget & !f2 %in% cattarget){
        eqn <- eqn_cc
    } else {
        return()
    }

    xfit <- bam(eqn, data=dfraw23, method='REML') 
    sxfit<-summary(xfit)
    pxfit<-plot(xfit)
    for (i in 1:length(pxfit)){
        pxfit[[i]]$raw=NULL
    }
    result<-list(f1, f2, as.list(sxfit), pxfit)
    if (returnf){
        return(result)
    }
    output_to_python <- toJSON(result, force = TRUE)
    write(output_to_python, filename)
}

In [None]:
#gam_proc2d(cattarget, dfraw, target_combo[[179]][1], target_combo[[179]][2], stg, fs, oversample, model_type)

In [None]:
for (tar in target_combo){
    print(paste(tar[1], tar[2], 'running'))
    flush.console()
    gam_proc2d(cattarget, dfraw, tar[1], tar[2], stg, fs, oversample, model_type)
}  

In [None]:
print('done')

In [None]:
xxx<- gam_proc2d(cattarget, dfraw, "LAB::4544-3(%)", "PX:CH:J1940", stg, fs, oversample, model_type, returnf=TRUE)

In [None]:
output_to_python