In [None]:
rm(list=ls())
#### Packages installation
library(dplyr)
library(tidyverse)

In [None]:
### Double check the notebook 0, section New version- Nov 22 2023
### Not sure why the number of patient ID does not match, but that's the table we need
dataForRegression <- read.delim("demographic_icd_jha.txt", sep=",", header = TRUE) 
#head(dataForRegression)


In [None]:
dataForRegression <- dataForRegression %>%
            dplyr::mutate( Hispanic = ifelse( Hispanic =="Y", 1, 0), 
                           Sex = ifelse( Sex =="M", 1, 0), 
                           Age = as.numeric( Age ), 
                           FPL = as.character(FPL )) %>%
            dplyr::select( -X, -Race, -M_Status, -State  )

#head(dataForRegression)


In [None]:
colnames( dataForRegression )[7:16] <- c("Alcohol","Opioid","Cannabis","Sedative_hypnoti_anxiolytic","Cocaine",
"OtherStimulant","Hallucinogen","NicotineDependence","Inhalant","Other_psychoactive_substance")

#head(dataForRegression)



In [None]:
# List of SUDs
sud_list <- c("Alcohol","Opioid","Cannabis","Sedative_hypnoti_anxiolytic","Cocaine",
"OtherStimulant","Hallucinogen","NicotineDependence","Inhalant","Other_psychoactive_substance") 

# Loop through each SUD
for (sud in sud_list) {
  print(sud)
  formula <- as.formula(paste(sud, "~ Sex + Age + Hispanic + FPL + Lang"))
  model <- glm(formula, data = dataForRegression, family = binomial)
  print( summary(model))
  print("####")
}


In [None]:
# save results in a data.frame

# Create an empty list to store results
results_list <- list()

# Loop through each SUD
for (sud in sud_list) {
  formula <- as.formula(paste(sud, "~ Sex + Age + Hispanic + FPL + Lang"))
  model <- glm(formula, data = dataForRegression, family = binomial)
  results <- summary(model)
  
  # Extract relevant information from summary
  coefficients <- round(coef(results), 3)
  p_values <- round(coef(summary(model))[, "Pr(>|z|)"], 3)
  
  # Apply multiple testing correction (Benjamini-Hochberg)
  adjusted_p_values <- p.adjust(p_values, method = "BH")
  
  # Combine results into a data frame
  result_df <- data.frame(
    Predictor = rownames(coefficients),
    Coefficient = coefficients,
    Odds_Ratio = round(exp(coefficients),3),
    Adjusted_P_Value = round(adjusted_p_values,3),
    stringsAsFactors = FALSE
  )
  
  # Add result_df to the results_list
  results_list[[sud]] <- result_df
}

# Combine results for all SUDs into one data frame
all_results_df <- do.call(rbind, results_list)



In [None]:
all_results_df

In [None]:
write.csv(all_results_df, file = "./regression_analysis_results_test.csv", row.names = FALSE)

In [None]:
#install.packages("table1")
library(table1)
library(IRdisplay)

In [None]:
tableOutput <- table1(~ as.factor(Sex) + Age + as.factor(FPL) + as.factor(Lang) | as.factor(Hispanic), data=dataForRegression )

In [None]:
display_html( tableOutput )