In [None]:
#!conda create -n ag python=3.10
#!conda activate ag
#!conda install -c conda-forge mamba
#!mamba install -c conda-forge -c pytorch -c nvidia autogluon "pytorch=*=*cuda*"
#!mamba install -c conda-forge "ray-tune >=2.6.3,<2.7" "ray-default >=2.6.3,<2.7"  # install ray for faster training

In [None]:
#!pip install autogluon==1.0.0
#!pip install --upgrade numpy pandas scipy
#!pip install numpy==1.26.4
#!pip install pyJoules
#!pip install mxnet-cu110
#!pip install jedi
#!pip install setuptools
#!pip install scikit-learn==1.3.0
#!pip install pandas==2.0.0
#!pip install fsspec==2023.1.0
#!pip install torch==2.0.1+cu118 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
#cls
# !pip install cudatoolkit


Collecting autogluon==1.0.0
  Using cached autogluon-1.0.0-py3-none-any.whl.metadata (12 kB)
Collecting autogluon.core==1.0.0 (from autogluon.core[all]==1.0.0->autogluon==1.0.0)
  Using cached autogluon.core-1.0.0-py3-none-any.whl.metadata (13 kB)
Collecting autogluon.features==1.0.0 (from autogluon==1.0.0)
  Using cached autogluon.features-1.0.0-py3-none-any.whl.metadata (12 kB)
Collecting autogluon.tabular==1.0.0 (from autogluon.tabular[all]==1.0.0->autogluon==1.0.0)
  Using cached autogluon.tabular-1.0.0-py3-none-any.whl.metadata (14 kB)
Collecting autogluon.multimodal==1.0.0 (from autogluon==1.0.0)
  Using cached autogluon.multimodal-1.0.0-py3-none-any.whl.metadata (14 kB)
Collecting autogluon.timeseries==1.0.0 (from autogluon.timeseries[all]==1.0.0->autogluon==1.0.0)
  Using cached autogluon.timeseries-1.0.0-py3-none-any.whl.metadata (13 kB)
Collecting numpy<1.29,>=1.21 (from autogluon.core==1.0.0->autogluon.core[all]==1.0.0->autogluon==1.0.0)
  Downloading numpy-1.26.4-cp39-cp39-

In [1]:
import torch
print(torch.cuda.is_available())  # Should be True
print(torch.cuda.device_count())  # Should be > 0

True
1


In [None]:
import pandas as pd
import numpy as np
import logging
import os
import time
from sklearn.feature_selection import mutual_info_classif, VarianceThreshold
from sklearn.model_selection import train_test_split
from autogluon.tabular import TabularPredictor

# Deep Learning Part

In [None]:
FILENAME = "SAD"
DATA_PATH = "sad.csv"
TARGET = "ChanceofAdmit" 
SELECTED_METRICS = ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']
KFOLD=10    

# Load dataset
df = pd.read_csv(DATA_PATH)
df = df.drop(['Serial No.'], axis=1)
df.columns = df.columns.str.replace(' ', '')

# Transform class
df[TARGET] = df[TARGET].apply(lambda x: 1 if x >= 0.7 else 0)

# Separate features and target
X = df.drop(columns=[TARGET])
y = df[TARGET]

# Feature Selection: Variance Filtering
selector_variance = VarianceThreshold(threshold=(.8 * (1 - .8)))
X_variance = selector_variance.fit_transform(X)
variance_columns = X.columns[selector_variance.get_support()]

# Feature Selection: Mutual Information Filtering
mi_scores = mutual_info_classif(X_variance, y, discrete_features='auto')
mi_threshold = 0.01 
mi_mask = mi_scores > mi_threshold
X_mi = X_variance[:, mi_mask]
mi_columns = variance_columns[mi_mask]

# Create a DataFrame with the selected features
X_selected = pd.DataFrame(X_mi, columns=mi_columns)

# Create a new DataFrame with the selected features and selected class
df_selected = X_selected.copy()
df_selected[TARGET] = y
for gpu_available in [0, 1]:
    for validation_type in ['kfold']:
        start_time = time.time()
       
        train_data = df_selected
        test_data = df_selected
        
        path = f"GPU_{gpu_available}_{FILENAME}_DL_VALIDATION_{validation_type}"

        # Create the dir if not exist
        os.makedirs(path, exist_ok=True)
        predictor = TabularPredictor(label=TARGET,  path=path, problem_type="binary")
        if(gpu_available):
            predictor.fit(train_data, num_bag_folds=KFOLD, verbosity=2, num_gpus=1, excluded_model_types= ['RF', 'KNN', 'GBM','XGB','CAT','XT','LR'], presets="best_quality")
        else:
            predictor.fit(train_data, num_bag_folds=KFOLD, verbosity=2, excluded_model_types= ['RF', 'KNN', 'GBM','XGB','CAT','XT','LR'], presets="best_quality")
        
        # Test data evaluaton
        # performance = predictor.evaluate(test_data, extra_metrics=SELECTED_METRICS)
        # print(performance)

        # Laderboard
        dfl = predictor.leaderboard(test_data, silent=True, extra_metrics=SELECTED_METRICS)
        print(dfl)

        # Get best model metrics
        # best_model = predictor.get_model_best()
        # performance = predictor.evaluate(test_data, model=best_model)
        # print(f"Performance of the best model ({best_model}):")
        # print(performance)

        end_time = time.time()
        execution_time_minutes = (end_time - start_time) / 60

        filename = f"GPU_{gpu_available}_{FILENAME}_DL_VALIDATION_{validation_type}_TIME_{execution_time_minutes:.2f}.csv"

        print(filename)
        dfl.to_csv(filename, index=False)


[codecarbon INFO @ 16:59:19] [setup] RAM Tracking...
[codecarbon INFO @ 16:59:19] [setup] GPU Tracking...
[codecarbon INFO @ 16:59:19] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 16:59:19] [setup] CPU Tracking...
[36m(_dystack pid=31604)[0m 	0.9159	 = Validation score   (accuracy)
[36m(_dystack pid=31604)[0m 	3.83s	 = Training   runtime
[36m(_dystack pid=31604)[0m 	0.08s	 = Validation runtime
[36m(_dystack pid=31604)[0m Fitting model: NeuralNetTorch_r31_BAG_L2 ... Training model for up to 542.77s of the 542.71s of remaining time.
[36m(_dystack pid=31604)[0m 	Fitting 10 child models (S1F1 - S1F10) | Fitting with ParallelLocalFoldFittingStrategy (10 workers, per: cpus=3, gpus=0, memory=0.00%)
[codecarbon INFO @ 16:59:21] Energy consumed for RAM : 0.002397 kWh. RAM Power : 23.979151725769043 W
[codecarbon INFO @ 16:59:21] Energy consumed for all GPUs : 0.003025 kWh. Total GPU Power : 64.36328344135129 W
[codecarbon INFO @ 16:59:21] Energy consumed for all CPUs : 0.005253 k

                          model  score_test  accuracy  precision    recall  \
0     NeuralNetTorch_r36_BAG_L1       0.856     0.856   0.888889  0.864865   
1   NeuralNetFastAI_r111_BAG_L1       0.856     0.856   0.888889  0.864865   
2    NeuralNetTorch_r197_BAG_L1       0.848     0.848   0.887324  0.851351   
3     NeuralNetTorch_r41_BAG_L1       0.848     0.848   0.898551  0.837838   
4   NeuralNetFastAI_r191_BAG_L1       0.848     0.848   0.898551  0.837838   
..                          ...         ...       ...        ...       ...   
85   NeuralNetTorch_r121_BAG_L2       0.800     0.800   0.888889  0.756757   
86   NeuralNetTorch_r185_BAG_L1       0.792     0.792   0.887097  0.743243   
87    NeuralNetTorch_r71_BAG_L2       0.792     0.792   0.887097  0.743243   
88    NeuralNetTorch_r76_BAG_L2       0.784     0.784   0.885246  0.729730   
89    NeuralNetTorch_r19_BAG_L2       0.784     0.784   0.885246  0.729730   

          f1   roc_auc  score_val eval_metric  pred_time_test  

Leaderboard on holdout data (DyStack):
                          model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   NeuralNetFastAI_r191_BAG_L1       0.964286   0.896396    accuracy        0.197692       0.032642    3.713194                 0.197692                0.032642           3.713194            1       True          4
1    NeuralNetTorch_r185_BAG_L1       0.946429   0.889640    accuracy        0.116266       0.025636    3.400104                 0.116266                0.025636           3.400104            1       True         28
2   NeuralNetFastAI_r194_BAG_L1       0.946429   0.873874    accuracy        0.191566       0.078196    2.852468                 0.191566                0.078196           2.852468            1       True         36
3    NeuralNetFastAI_r65_BAG_L1       0.946429   0.882883    accuracy        0.194152       0.132

                          model  score_test  accuracy  precision    recall  \
0    NeuralNetTorch_r185_BAG_L1       0.936     0.936   0.952703  0.940000   
1      NeuralNetTorch_r1_BAG_L1       0.928     0.928   0.942953  0.936667   
2    NeuralNetTorch_r185_BAG_L2       0.924     0.924   0.939597  0.933333   
3     NeuralNetTorch_r41_BAG_L2       0.920     0.920   0.930464  0.936667   
4     NeuralNetTorch_r41_BAG_L1       0.918     0.918   0.933110  0.930000   
..                          ...         ...       ...        ...       ...   
85  NeuralNetFastAI_r111_BAG_L1       0.872     0.872   0.893333  0.893333   
86   NeuralNetFastAI_r65_BAG_L1       0.870     0.870   0.909408  0.870000   
87  NeuralNetFastAI_r103_BAG_L1       0.868     0.868   0.906250  0.870000   
88  NeuralNetFastAI_r194_BAG_L1       0.868     0.868   0.903448  0.873333   
89  NeuralNetFastAI_r143_BAG_L1       0.866     0.866   0.897611  0.876667   

          f1   roc_auc  score_val eval_metric  pred_time_test  

Leaderboard on holdout data (DyStack):
                          model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0     NeuralNetTorch_r31_BAG_L1       0.952381   0.894895    accuracy        0.100271       0.032250    2.799279                 0.100271                0.032250           2.799279            1       True         23
1   NeuralNetFastAI_r143_BAG_L1       0.952381   0.912913    accuracy        0.204247       0.064702    4.507758                 0.204247                0.064702           4.507758            1       True         13
2     NeuralNetTorch_r71_BAG_L1       0.928571   0.897898    accuracy        0.092311       0.024702    2.092741                 0.092311                0.024702           2.092741            1       True         27
3     NeuralNetTorch_r36_BAG_L1       0.928571   0.891892    accuracy        0.099151       0.029

                          model  score_test  accuracy  precision    recall  \
0     NeuralNetTorch_r41_BAG_L1       0.864     0.864   0.913043  0.851351   
1     NeuralNetTorch_r36_BAG_L1       0.856     0.856   0.888889  0.864865   
2    NeuralNetTorch_r197_BAG_L1       0.848     0.848   0.887324  0.851351   
3        NeuralNetFastAI_BAG_L1       0.848     0.848   0.898551  0.837838   
4     NeuralNetTorch_r86_BAG_L1       0.840     0.840   0.897059  0.824324   
..                          ...         ...       ...        ...       ...   
85   NeuralNetTorch_r143_BAG_L2       0.808     0.808   0.890625  0.770270   
86  NeuralNetFastAI_r172_BAG_L2       0.808     0.808   0.878788  0.783784   
87    NeuralNetTorch_r76_BAG_L1       0.800     0.800   0.876923  0.770270   
88    NeuralNetTorch_r19_BAG_L1       0.800     0.800   0.876923  0.770270   
89   NeuralNetTorch_r185_BAG_L1       0.800     0.800   0.901639  0.743243   

          f1   roc_auc  score_val eval_metric  pred_time_test  

Leaderboard on holdout data (DyStack):
                          model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   NeuralNetFastAI_r191_BAG_L1       0.964286   0.896396    accuracy        0.189041       0.071285    3.607625                 0.189041                0.071285           3.607625            1       True          4
1    NeuralNetTorch_r185_BAG_L1       0.946429   0.889640    accuracy        0.110532       0.021662    3.334454                 0.110532                0.021662           3.334454            1       True         28
2   NeuralNetFastAI_r103_BAG_L1       0.946429   0.882883    accuracy        0.188658       0.036022    3.020126                 0.188658                0.036022           3.020126            1       True         11
3   NeuralNetFastAI_r194_BAG_L1       0.946429   0.873874    accuracy        0.189956       0.029

                          model  score_test  accuracy  precision    recall  \
0    NeuralNetTorch_r185_BAG_L1       0.940     0.940   0.953020  0.946667   
1     NeuralNetTorch_r31_BAG_L2       0.932     0.932   0.949324  0.936667   
2     NeuralNetTorch_r41_BAG_L1       0.928     0.928   0.948980  0.930000   
3      NeuralNetTorch_r1_BAG_L1       0.926     0.926   0.939799  0.936667   
4     NeuralNetTorch_r22_BAG_L2       0.926     0.926   0.942761  0.933333   
..                          ...         ...       ...        ...       ...   
85  NeuralNetFastAI_r160_BAG_L1       0.872     0.872   0.893333  0.893333   
86   NeuralNetFastAI_r88_BAG_L1       0.870     0.870   0.921147  0.856667   
87   NeuralNetFastAI_r65_BAG_L1       0.870     0.870   0.909408  0.870000   
88  NeuralNetFastAI_r103_BAG_L1       0.868     0.868   0.906250  0.870000   
89  NeuralNetFastAI_r194_BAG_L1       0.868     0.868   0.903448  0.873333   

          f1   roc_auc  score_val eval_metric  pred_time_test  

In [None]:
dfl