In [None]:
#!conda create -n ag python=3.10
#!conda activate ag
#!conda install -c conda-forge mamba
#!mamba install -c conda-forge -c pytorch -c nvidia autogluon "pytorch=*=*cuda*"
#!mamba install -c conda-forge "ray-tune >=2.6.3,<2.7" "ray-default >=2.6.3,<2.7"  # install ray for faster training

In [None]:
#!pip install autogluon==1.0.0
#!pip install --upgrade numpy pandas scipy
#!pip install numpy==1.26.4
#!pip install pyJoules
#!pip install mxnet-cu110
#!pip install jedi
#!pip install setuptools
#!pip install scikit-learn==1.3.0
#!pip install pandas==2.0.0
#!pip install fsspec==2023.1.0
#!pip install torch==2.0.1+cu118 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
#cls
# !pip install cudatoolkit


In [1]:
import torch
print(torch.cuda.is_available())  # Should be True
print(torch.cuda.device_count())  # Should be > 0

True
1


In [None]:
import pandas as pd
import numpy as np
import logging
import os
import time
from sklearn.feature_selection import mutual_info_classif, VarianceThreshold
from sklearn.model_selection import train_test_split
from autogluon.tabular import TabularPredictor

# Deep Learning Part

In [None]:
FILENAME = "EPPD"
DATA_PATH = "eppd.csv"
TARGET = "PlacedOrNot" 
SELECTED_METRICS = ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']
KFOLD=10    

# Load dataset
df = pd.read_csv(DATA_PATH)
dummy_gender = pd.get_dummies(df['Gender'])
dummy_stream = pd.get_dummies(df['Stream'])
df.columns = df.columns.str.replace(' ', '')
df = pd.concat([df.drop(["Gender", "Stream"], axis = 1), dummy_gender, dummy_stream], axis = 1)
df = df[['Age', 'Male', 'Female',
            'Electronics And Communication',
            'Computer Science', 'Information Technology',
            'Mechanical', 'Electrical', "Civil",
            'Internships','CGPA','Hostel',
            'HistoryOfBacklogs', 'PlacedOrNot']]

# Separate features and target
X = df.drop(columns=[TARGET])
y = df[TARGET]

# Feature Selection: Variance Filtering
selector_variance = VarianceThreshold(threshold=(.8 * (1 - .8)))
X_variance = selector_variance.fit_transform(X)
variance_columns = X.columns[selector_variance.get_support()]

# Feature Selection: Mutual Information Filtering
mi_scores = mutual_info_classif(X_variance, y, discrete_features='auto')
mi_threshold = 0.01 
mi_mask = mi_scores > mi_threshold
X_mi = X_variance[:, mi_mask]
mi_columns = variance_columns[mi_mask]

# Create a DataFrame with the selected features
X_selected = pd.DataFrame(X_mi, columns=mi_columns)

# Create a new DataFrame with the selected features and selected class
df_selected = X_selected.copy()
df_selected[TARGET] = y
for gpu_available in [0, 1]:
    for validation_type in ['kfold']:
        start_time = time.time()        
        train_data = df_selected
        test_data = df_selected
        
        path = f"GPU_{gpu_available}_{FILENAME}_DL_VALIDATION_{validation_type}"

        # Create the dir if not exist
        os.makedirs(path, exist_ok=True)
        predictor = TabularPredictor(label=TARGET,  path=path, problem_type="binary")
        if(gpu_available):
            predictor.fit(train_data, num_bag_folds=KFOLD, verbosity=2, num_gpus=1, excluded_model_types= ['RF', 'KNN', 'GBM','XGB','CAT','XT','LR'], presets="best_quality")
        else:
            predictor.fit(train_data, num_bag_folds=KFOLD, verbosity=2, excluded_model_types= ['RF', 'KNN', 'GBM','XGB','CAT','XT','LR'], presets="best_quality")
        
        # Test data evaluaton
        # performance = predictor.evaluate(test_data, extra_metrics=SELECTED_METRICS)
        # print(performance)

        # Laderboard
        dfl = predictor.leaderboard(test_data, silent=True, extra_metrics=SELECTED_METRICS)
        print(dfl)

        # Get best model metrics
        # best_model = predictor.get_model_best()
        # performance = predictor.evaluate(test_data, model=best_model)
        # print(f"Performance of the best model ({best_model}):")
        # print(performance)
             
        end_time = time.time()
        execution_time_minutes = (end_time - start_time) / 60

        filename = f"GPU_{gpu_available}_{FILENAME}_DL_VALIDATION_{validation_type}_TIME_{execution_time_minutes:.2f}.csv"

        print(filename)
        dfl.to_csv(filename, index=False)


[codecarbon INFO @ 09:48:15] [setup] RAM Tracking...
[codecarbon INFO @ 09:48:15] [setup] GPU Tracking...
[codecarbon INFO @ 09:48:15] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 09:48:15] [setup] CPU Tracking...
[codecarbon INFO @ 09:48:16] CPU Model on constant consumption mode: AMD Ryzen 9 5950X 16-Core Processor
[codecarbon INFO @ 09:48:16] >>> Tracker's metadata:
[codecarbon INFO @ 09:48:16]   Platform system: Windows-10-10.0.22631-SP0
[codecarbon INFO @ 09:48:16]   Python version: 3.11.4
[codecarbon INFO @ 09:48:16]   CodeCarbon version: 2.5.0
[codecarbon INFO @ 09:48:16]   Available RAM : 63.944 GB
[codecarbon INFO @ 09:48:16]   CPU count: 32
[codecarbon INFO @ 09:48:16]   CPU model: AMD Ryzen 9 5950X 16-Core Processor
[codecarbon INFO @ 09:48:16]   GPU count: 1
[codecarbon INFO @ 09:48:16]   GPU model: 1 x NVIDIA GeForce RTX 3090
[codecarbon INFO @ 09:48:16] Saving emissions data to file c:\Users\JAL\Documents\GitHub\Art1\100\EPPD\emissions.csv
Verbosity: 2 (Standard Logg

                          model  score_test  accuracy  precision    recall  \
0     NeuralNetTorch_r19_BAG_L1    0.843666  0.843666   0.954839  0.743719   
1     NeuralNetTorch_r76_BAG_L1    0.843666  0.843666   0.954839  0.743719   
2     NeuralNetTorch_r14_BAG_L1    0.843666  0.843666   0.954839  0.743719   
3     NeuralNetTorch_r71_BAG_L1    0.843666  0.843666   0.957792  0.741206   
4     NeuralNetTorch_r41_BAG_L1    0.843666  0.843666   0.954839  0.743719   
5     NeuralNetTorch_r87_BAG_L1    0.843666  0.843666   0.954839  0.743719   
6     NeuralNetTorch_r36_BAG_L1    0.843666  0.843666   0.954839  0.743719   
7     NeuralNetTorch_r89_BAG_L1    0.843666  0.843666   0.954839  0.743719   
8         NeuralNetTorch_BAG_L1    0.843666  0.843666   0.954839  0.743719   
9     NeuralNetTorch_r86_BAG_L1    0.843666  0.843666   0.954839  0.743719   
10   NeuralNetTorch_r185_BAG_L1    0.843666  0.843666   0.954839  0.743719   
11    NeuralNetTorch_r31_BAG_L1    0.843666  0.843666   0.954839

Leaderboard on holdout data (DyStack):
                          model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0     NeuralNetTorch_r71_BAG_L1       0.866667   0.845599    accuracy        0.069668       0.022572    3.029003                 0.069668                0.022572           3.029003            1       True         27
1     NeuralNetTorch_r14_BAG_L1       0.866667   0.843703    accuracy        0.071102       0.018502    2.869732                 0.071102                0.018502           2.869732            1       True         12
2           WeightedEnsemble_L2       0.866667   0.845599    accuracy        0.075566       0.022572    3.104474                 0.005898                0.000000           0.075471            2       True         45
3     NeuralNetTorch_r76_BAG_L1       0.866667   0.845220    accuracy        0.076079       0.020

                          model  score_test  accuracy  precision    recall  \
0     NeuralNetTorch_r71_BAG_L1    0.845920  0.845920   0.947050  0.763880   
1     NeuralNetTorch_r19_BAG_L1    0.845920  0.845920   0.947050  0.763880   
2     NeuralNetTorch_r14_BAG_L1    0.845920  0.845920   0.947050  0.763880   
3     NeuralNetTorch_r76_BAG_L1    0.845920  0.845920   0.947050  0.763880   
4     NeuralNetTorch_r41_BAG_L1    0.845920  0.845920   0.947050  0.763880   
5     NeuralNetTorch_r87_BAG_L1    0.845920  0.845920   0.947050  0.763880   
6    NeuralNetTorch_r197_BAG_L1    0.845920  0.845920   0.947050  0.763880   
7     NeuralNetTorch_r36_BAG_L1    0.845920  0.845920   0.947050  0.763880   
8         NeuralNetTorch_BAG_L1    0.845920  0.845920   0.947050  0.763880   
9     NeuralNetTorch_r86_BAG_L1    0.845920  0.845920   0.947050  0.763880   
10    NeuralNetTorch_r89_BAG_L1    0.845920  0.845920   0.947050  0.763880   
11   NeuralNetTorch_r185_BAG_L1    0.845920  0.845920   0.947050

Leaderboard on holdout data (DyStack):
                          model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0     NeuralNetTorch_r14_BAG_L1       0.858871   0.845648    accuracy        0.066508       0.016499    2.495673                 0.066508                0.016499           2.495673            1       True         12
1     NeuralNetTorch_r76_BAG_L1       0.858871   0.844636    accuracy        0.067736       0.040319    2.710382                 0.067736                0.040319           2.710382            1       True         33
2    NeuralNetTorch_r197_BAG_L1       0.858871   0.846660    accuracy        0.078513       0.043324    3.352503                 0.078513                0.043324           3.352503            1       True         19
3     NeuralNetTorch_r87_BAG_L1       0.858871   0.850709    accuracy        0.080987       0.024

                          model  score_test  accuracy  precision    recall  \
0     NeuralNetTorch_r19_BAG_L1    0.843666  0.843666   0.954839  0.743719   
1     NeuralNetTorch_r71_BAG_L1    0.843666  0.843666   0.954839  0.743719   
2     NeuralNetTorch_r76_BAG_L1    0.843666  0.843666   0.954839  0.743719   
3     NeuralNetTorch_r14_BAG_L1    0.843666  0.843666   0.954839  0.743719   
4     NeuralNetTorch_r36_BAG_L1    0.843666  0.843666   0.954839  0.743719   
5     NeuralNetTorch_r87_BAG_L1    0.843666  0.843666   0.954839  0.743719   
6     NeuralNetTorch_r41_BAG_L1    0.843666  0.843666   0.954839  0.743719   
7     NeuralNetTorch_r31_BAG_L1    0.843666  0.843666   0.954839  0.743719   
8    NeuralNetTorch_r185_BAG_L1    0.843666  0.843666   0.954839  0.743719   
9         NeuralNetTorch_BAG_L1    0.843666  0.843666   0.954839  0.743719   
10     NeuralNetTorch_r1_BAG_L1    0.843666  0.843666   0.954839  0.743719   
11    NeuralNetTorch_r79_BAG_L1    0.843666  0.843666   0.954839

Leaderboard on holdout data (DyStack):
                          model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0     NeuralNetTorch_r76_BAG_L1       0.866667   0.845220    accuracy        0.062887       0.046883    3.275959                 0.062887                0.046883           3.275959            1       True         33
1     NeuralNetTorch_r71_BAG_L1       0.866667   0.845599    accuracy        0.062887       0.021845    3.009025                 0.062887                0.021845           3.009025            1       True         27
2     NeuralNetTorch_r14_BAG_L1       0.866667   0.843703    accuracy        0.062889       0.049265    2.961307                 0.062889                0.049265           2.961307            1       True         12
3     NeuralNetTorch_r19_BAG_L1       0.866667   0.844841    accuracy        0.068509       0.025

                          model  score_test  accuracy  precision    recall  \
0     NeuralNetTorch_r14_BAG_L1    0.845920  0.845920   0.947050  0.763880   
1     NeuralNetTorch_r76_BAG_L1    0.845920  0.845920   0.947050  0.763880   
2     NeuralNetTorch_r19_BAG_L1    0.845920  0.845920   0.947050  0.763880   
3     NeuralNetTorch_r71_BAG_L1    0.845920  0.845920   0.947050  0.763880   
4     NeuralNetTorch_r41_BAG_L1    0.845920  0.845920   0.947050  0.763880   
5     NeuralNetTorch_r87_BAG_L1    0.845920  0.845920   0.947050  0.763880   
6    NeuralNetTorch_r197_BAG_L1    0.845920  0.845920   0.947050  0.763880   
7         NeuralNetTorch_BAG_L1    0.845920  0.845920   0.947050  0.763880   
8     NeuralNetTorch_r36_BAG_L1    0.845920  0.845920   0.947050  0.763880   
9     NeuralNetTorch_r86_BAG_L1    0.845920  0.845920   0.947050  0.763880   
10   NeuralNetTorch_r185_BAG_L1    0.845920  0.845920   0.947050  0.763880   
11    NeuralNetTorch_r31_BAG_L1    0.845920  0.845920   0.947050