definecoder
diff --git a/‎.DS_Store
4 KB b/‎.DS_Store
4 KB
diff --git a/‎api/.DS_Store
4 KB b/‎api/.DS_Store
4 KB
diff --git a/‎api/code/batch_effect_correction.R
Lines changed: 34 additions & 32 deletions b/‎api/code/batch_effect_correction.R
Lines changed: 34 additions & 32 deletions
diff --git a/‎api/code/code.py
Lines changed: 18 additions & 6 deletions b/‎api/code/code.py
Lines changed: 18 additions & 6 deletions
diff --git a/‎api/routers/operation_router.py
Lines changed: 15 additions & 5 deletions b/‎api/routers/operation_router.py
Lines changed: 15 additions & 5 deletions
@@ -1,84 +1,86 @@
-
 batch_effect_correction <- function(input_file, output_dir, user_id) {
   library(jsonlite)
   library(sva) # For batch effect correction
-  
+
   tryCatch(
     {
       # Read and preprocess data - preserve exact feature names
       merged_df_data <- read.csv(input_file, header = TRUE, row.names = 1, check.names = FALSE)
       merged_df_data <- na.omit(merged_df_data)
-      
+
       # Ensure unique column names
       colnames(merged_df_data) <- make.unique(colnames(merged_df_data))
-      
+
       # Extract condition and expression matrix
       condition_info <- merged_df_data$condition
       data_t <- t(merged_df_data[, !(colnames(merged_df_data) %in% c("condition", "batch"))])
-      
+
       # Save original feature names
       feature_names <- rownames(data_t)
       sample_names <- colnames(data_t)
-      
+
       # Batch effect correction with ComBat
       batch_info <- merged_df_data$batch
       data_combat <- ComBat(dat = as.matrix(data_t), batch = batch_info, par.prior = TRUE, prior.plots = FALSE)
-      
+
       # Restore original feature names
       rownames(data_combat) <- feature_names
-      
+
       # Save corrected data
       output_file <- file.path(output_dir, paste0("batch_", basename(input_file)))
       data_corrected <- t(data_combat)
       data_corrected_with_condition <- cbind(condition = condition_info, data_corrected)
-      
+
       # Write CSV with proper quoting to preserve commas/spaces in feature names
       write.csv(
         data_corrected_with_condition,
         output_file,
         row.names = TRUE,
         quote = TRUE,
         na = "",
-        fileEncoding = "UTF-8")
-      
+        fileEncoding = "UTF-8"
+      )
+
       # Create boxplots in PDF and PNG formats only
       plot_formats <- c("pdf", "png")
       for (fmt in plot_formats) {
         file_name <- file.path(output_dir, paste0("batch_correction_boxplots.", fmt))
-        
+
         # Set up the plotting device
         if (fmt == "png") {
-          png(file_name, width = 1200, height = 600, res = 300)
+          png(file_name, width = 2400, height = 1200, res = 300)
         } else {
           pdf(file_name, width = 12, height = 6)
         }
-        
+
         # Create the plots
         par(mfrow = c(1, 2), mar = c(10, 5, 4, 2))
-        
+
         # Pre-correction plot
         boxplot(data_t,
-                main = "Before Batch Correction", 
-                las = 2, 
-                col = "lightblue", 
-                outline = FALSE,
-                ylab = "Expression Levels", 
-                cex.axis = 0.7, 
-                names = sample_names)
-        
+          main = "Before Batch Correction",
+          las = 2,
+          col = "lightblue",
+          outline = FALSE,
+          ylab = "Expression Levels",
+          cex.axis = 0.7,
+          names = sample_names
+        )
+
         # Post-correction plot
         boxplot(data_combat,
-                main = "After Batch Correction", 
-                las = 2, 
-                col = "lightgreen",
-                outline = FALSE, 
-                ylab = "Expression Levels", 
-                cex.axis = 0.7, 
-                names = sample_names)
-        
+          main = "After Batch Correction",
+          las = 2,
+          col = "lightgreen",
+          outline = FALSE,
+          ylab = "Expression Levels",
+          cex.axis = 0.7,
+          names = sample_names
+        )
+
         dev.off()
       }
-      
+
       # Output completion message
       cat("Batch effect correction completed. Corrected data saved to:", output_file, "\n")
       cat("Boxplots saved in PDF and PNG formats.\n")
 
@@ -856,12 +856,16 @@ def set_perplexity(n_samples):
 )
 from sklearn.base import clone
 
-def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir, user_info):
+def rank_features(top10_df_path, selected_model, param_grids, classifiers, output_dir, user_info):
     """
     Rank top features based on single-feature model performance (AUPRC, AUROC, etc.).
     Saves CSV and plots ROC/PR curves for each.
     """
 
+    top10_df = pd.read_csv(top10_df_path)
+
+    print('top10_df:', top10_df.head())
+
     try:
         # --- Validate inputs ---
         if selected_model not in param_grids:
@@ -932,6 +936,9 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir
         csv_path = os.path.join(output_dir, 'single_feature_metrics_ranking.csv')
         metrics_df.to_csv(csv_path, index=False)
 
+
+        print("okay till plotting")
+
         # --- Plotting ---
         fig, axes = plt.subplots(1, 2, figsize=(15, 6))
 
@@ -981,19 +988,20 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir
 
         # Return URLs
         base_url = f"{BASE_URL}/files/{user_info['user_id']}"
-        return json.dumps({
+        return {
             "message": "Feature ranking and plotting completed successfully.",
             "ranking_file": f"{base_url}/single_feature_metrics_ranking.csv",
             "plot_png": f"{base_url}/single_feature_model_performance_landscape.png",
             "plot_pdf": f"{base_url}/single_feature_model_performance_landscape.pdf",
             "metrics": metrics_df.to_dict(orient="records")
-        })
+        }
 
     except Exception as e:
-        return json.dumps({
+        print(e)
+        return {
             "message": "Error during feature ranking and plotting.",
             "error": str(e)
-        })
+        }
 
 
 
@@ -1009,7 +1017,7 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir
     matthews_corrcoef, log_loss
 )
 
-def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param_grids, classifiers, output_dir, user_info):
+def evaluate_model_with_features(top10_df_path, selected_model, param_grids, classifiers, output_dir, user_info):
     """
     Evaluate the performance of models using top-N features (10 to 1), save plots and metrics, and select the best feature subset.
     """
@@ -1018,6 +1026,10 @@ def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param
         outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
         inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
 
+        top10_df = pd.read_csv(top10_df_path)
+
+        top10_df_array = top10_df.drop(columns='condition').columns.to_numpy()
+
         # Storage
         roc_curves = []
         pr_curves = []
 
@@ -455,6 +455,7 @@ async def benchmark_models_api(user_info: dict = Depends(verify_token)):
 from code.code import get_model_and_importance_with_top10, best_models
 from fastapi import Form
 global_model_name  = "Extra Trees" 
+global_basef_name = "top10_features_extra_trees.csv"
 
 @router.post('/top10-features')
 async def top10_features(model_name: str = Form(...), user_info: dict = Depends(verify_token)):
@@ -491,6 +492,8 @@ async def top10_features(model_name: str = Form(...), user_info: dict = Depends(
             user_info=user_info
         )
 
+        global_basef_name = result['top10_features_path']
+
         return {
             "message": "Top 10 features extracted successfully.",
             "top10_features": result["top10_features"],
@@ -516,7 +519,7 @@ async def visualize_dimensions_api(
     try:
         # Define file paths
         user_id = str(user_info['user_id'])
-        input_file = os.path.join("code", user_id, "files", "top10_features_extra_trees.csv")
+        input_file = os.path.join("code", user_id, "files", global_basef_name)
         output_dir = os.path.join("code", user_id, "files")
 
         # Ensure the input file exists
@@ -546,7 +549,7 @@ async def visualize_dimensions_api(
 from code.code import rank_features, param_grids, classifiers
 
 @router.get('/evaluate-single-features')
-async def rank_features_api(
+async def evaluate_single_features(
     user_info: dict = Depends(verify_token)
 ):
     """
@@ -555,7 +558,7 @@ async def rank_features_api(
     try:
         # Define file paths
         user_id = str(user_info['user_id'])
-        input_file = os.path.join("code", user_id, "files", "top10_features_extra_trees.csv")
+        input_file = os.path.join("code", user_id, "files", global_basef_name)
         output_dir = os.path.join("code", user_id, "files")
 
         # Ensure the input file exists
@@ -568,6 +571,8 @@ async def rank_features_api(
         # Call the feature ranking function
         result = rank_features(input_file, global_model_name, param_grids, classifiers, output_dir, user_info)
 
+        print('result: ', result)
+
         # Check for errors in the result
         if "error" in result:
             return {"message": "Feature ranking failed.", "error": result["error"]}
@@ -609,10 +614,15 @@ async def evaluate_model_features_api(
         # Call the function
         result = evaluate_model_with_features(input_file, global_model_name, param_grids, classifiers, output_dir, user_info)
 
+        print('result: ', result)
+
         # Handle errors
         if "error" in result:
             return {"message": "Evaluation failed.", "error": result["error"]}
 
+
+        
+
         return {
             "message": result["message"],
             "metrics_file": result["metrics_file"],
@@ -637,7 +647,7 @@ async def visualize_dimensions_api(
     try:
         # Define file paths
         user_id = str(user_info['user_id'])
-        input_file = os.path.join("code", user_id, "files", "final_selected_features_auprc.csv")
+        input_file = os.path.join("code", user_id, "files", "final_selected_biomarker_algorithms_df.csv")
         output_dir = os.path.join("code", user_id, "files")                
 
         # Ensure the input file exists
@@ -677,7 +687,7 @@ async def evaluate_final_model_api(
     try:
         # Define file paths
         user_id = str(user_info['user_id'])
-        final_df_path = os.path.join("code", user_id, "files", "final_selected_features_auprc.csv")
+        final_df_path = os.path.join("code", user_id, "files", "final_selected_biomarker_algorithms_df.csv")
         output_dir = os.path.join("code", user_id, "files")
 
         # Ensure the input file exists