In [1]:
pip install pytorch-tabnet pandas scikit-learn

Collecting pytorch-tabnetNote: you may need to restart the kernel to use updated packages.

  Using cached pytorch_tabnet-4.1.0-py3-none-any.whl (44 kB)
Collecting tqdm>=4.36
  Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)
Collecting torch>=1.3
  Downloading torch-2.6.0-cp310-cp310-win_amd64.whl (204.2 MB)
Collecting fsspec
  Downloading fsspec-2025.3.2-py3-none-any.whl (194 kB)
Collecting sympy==1.13.1
  Downloading sympy-1.13.1-py3-none-any.whl (6.2 MB)
Collecting filelock
  Downloading filelock-3.18.0-py3-none-any.whl (16 kB)
Collecting networkx
  Downloading networkx-3.4.2-py3-none-any.whl (1.7 MB)
Collecting mpmath<1.4,>=1.1.0
  Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)
Installing collected packages: mpmath, sympy, networkx, fsspec, filelock, tqdm, torch, pytorch-tabnet
Successfully installed filelock-3.18.0 fsspec-2025.3.2 mpmath-1.3.0 networkx-3.4.2 pytorch-tabnet-4.1.0 sympy-1.13.1 torch-2.6.0 tqdm-4.67.1


You should consider upgrading via the 'c:\Users\VIBHANSHU JAIN\Desktop\Client Project\campus-placement-analysis\EDA_Notebooks\ML_models\tensorflow_env\Scripts\python.exe -m pip install --upgrade pip' command.


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error, r2_score
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
import torch
from imblearn.over_sampling import SMOTE

def print_feature_importance(importance, features, title):
    """Text-based feature importance display"""
    print(f"\n=== {title} ===")
    fi_df = pd.DataFrame({'Feature': features, 'Importance': importance})
    fi_df = fi_df.sort_values('Importance', ascending=False).head(15)
    print(fi_df.to_string(index=False))

# 1. Load and prepare data
file_path = r'C:\Users\VIBHANSHU JAIN\Desktop\Client Project\campus-placement-analysis\EDA_Notebooks\datasets\cleaned_placement_dataset.csv'
data = pd.read_csv(file_path)

# Create interaction terms based on your columns
data['cgpa_x_internships'] = data['cgpa'] * data['internships']
data['dsa_x_projects'] = data['dsa'] * data['no_of_projects']

# Separate features and targets
X = data.drop(['is_placed', 'salary_as_fresher'], axis=1)
y_class = data['is_placed']
y_reg = data['salary_as_fresher']

# Identify numerical columns (excluding one-hot encoded)
num_cols = ['cgpa', 'inter_gpa', 'ssc_gpa', 'internships', 'no_of_projects', 
            'no_of_programming_languages', 'dsa', 'mobile_dev', 'web_dev', 
            'Machine Learning', 'cloud', 'cgpa_x_internships', 'dsa_x_projects']

# Scale only numerical features
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])

# 2. Split data
X_train, X_test, y_class_train, y_class_test, y_reg_train, y_reg_test = train_test_split(
    X, y_class, y_reg, test_size=0.15, random_state=42, stratify=y_class
)

# 3. Handle class imbalance
if y_class_train.mean() < 0.4 or y_class_train.mean() > 0.6:
    print("\nApplying SMOTE to handle class imbalance...")
    smote = SMOTE(random_state=42)
    X_train, y_class_train = smote.fit_resample(X_train, y_class_train)
    # For synthetic samples, set salary to median of placed students
    median_salary = y_reg_train[y_reg_train.notna()].median()
    y_reg_train = pd.Series(np.where(y_class_train == 1, median_salary, np.nan))

# 4. Placement Classifier
print("\nTraining Placement Classifier...")
clf = TabNetClassifier(
    optimizer_fn=torch.optim.Adam,
    optimizer_params={'lr': 2e-2, 'weight_decay': 1e-5},
    scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
    scheduler_params={'mode': 'max', 'patience': 5, 'factor': 0.5},
    mask_type='sparsemax',
    n_steps=5,
    n_d=32,
    n_a=32,
    gamma=1.3,
    lambda_sparse=1e-4,
    verbose=1
)

clf.fit(
    X_train.values, y_class_train.values,
    eval_set=[(X_test.values, y_class_test.values)],
    eval_metric=['accuracy', 'auc'],
    max_epochs=200,
    patience=30,
    batch_size=256,
    virtual_batch_size=128
)

# 5. Salary Regressor (only for placed students)
placed_train_mask = y_class_train == 1
if placed_train_mask.sum() > 0:
    print("\nTraining Salary Regressor...")
    reg = TabNetRegressor(
        optimizer_fn=torch.optim.Adam,
        optimizer_params={'lr': 2e-2, 'weight_decay': 1e-5},
        scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
        scheduler_params={'mode': 'min', 'patience': 5, 'factor': 0.5},
        n_steps=5,
        n_d=32,
        n_a=32,
        gamma=1.3,
        lambda_sparse=1e-4,
        verbose=1
    )

    reg.fit(
        X_train[placed_train_mask].values,
        y_reg_train[placed_train_mask].values.reshape(-1, 1),
        eval_set=[(X_test[y_class_test == 1].values, 
                 y_reg_test[y_class_test == 1].values.reshape(-1, 1))],
        eval_metric=['rmse', 'mae'],
        max_epochs=200,
        patience=30,
        batch_size=256,
        virtual_batch_size=128
    )

# 6. Evaluation
print("\n=== Model Evaluation ===")
y_class_pred = clf.predict(X_test.values)
print(f"Placement Accuracy: {accuracy_score(y_class_test, y_class_pred):.4f}")
print("\nClassification Report:")
print(classification_report(y_class_test, y_class_pred))

placed_test_mask = y_class_test == 1
if placed_test_mask.sum() > 0 and 'reg' in locals():
    y_reg_pred = reg.predict(X_test[placed_test_mask].values)
    print("\nSalary Metrics (for placed students):")
    print(f"RMSE: {np.sqrt(mean_squared_error(y_reg_test[placed_test_mask], y_reg_pred)):.2f}")
    print(f"R² Score: {r2_score(y_reg_test[placed_test_mask], y_reg_pred):.4f}")

# 7. Feature Importance
print_feature_importance(clf.feature_importances_, X.columns, "Placement Feature Importance")
if 'reg' in locals():
    print_feature_importance(reg.feature_importances_, X.columns, "Salary Feature Importance")

# 8. Save models
clf.save_model('placement_model.zip')
if 'reg' in locals():
    reg.save_model('salary_model.zip')
print("\nModels saved successfully.")


Applying SMOTE to handle class imbalance...

Training Placement Classifier...




epoch 0  | loss: 0.0     | val_0_accuracy: 0.54545 | val_0_auc: 0.55238 |  0:00:00s
epoch 1  | loss: 0.0     | val_0_accuracy: 0.54545 | val_0_auc: 0.55238 |  0:00:00s
epoch 2  | loss: 0.0     | val_0_accuracy: 0.54545 | val_0_auc: 0.55238 |  0:00:00s
epoch 3  | loss: 0.0     | val_0_accuracy: 0.54545 | val_0_auc: 0.55238 |  0:00:00s
epoch 4  | loss: 0.0     | val_0_accuracy: 0.54545 | val_0_auc: 0.55238 |  0:00:00s
epoch 5  | loss: 0.0     | val_0_accuracy: 0.54545 | val_0_auc: 0.55238 |  0:00:00s
epoch 6  | loss: 0.0     | val_0_accuracy: 0.54545 | val_0_auc: 0.55238 |  0:00:00s
epoch 7  | loss: 0.0     | val_0_accuracy: 0.54545 | val_0_auc: 0.55238 |  0:00:00s
epoch 8  | loss: 0.0     | val_0_accuracy: 0.54545 | val_0_auc: 0.55238 |  0:00:00s
epoch 9  | loss: 0.0     | val_0_accuracy: 0.54545 | val_0_auc: 0.55238 |  0:00:00s
epoch 10 | loss: 0.0     | val_0_accuracy: 0.54545 | val_0_auc: 0.55238 |  0:00:00s
epoch 11 | loss: 0.0     | val_0_accuracy: 0.54545 | val_0_auc: 0.55238 |  0



epoch 6  | loss: 0.0     | val_0_rmse: 17.76452| val_0_mae: 14.73147|  0:00:00s
epoch 7  | loss: 0.0     | val_0_rmse: 17.76452| val_0_mae: 14.73147|  0:00:00s
epoch 8  | loss: 0.0     | val_0_rmse: 17.76452| val_0_mae: 14.73147|  0:00:00s
epoch 9  | loss: 0.0     | val_0_rmse: 17.76452| val_0_mae: 14.73147|  0:00:00s
epoch 10 | loss: 0.0     | val_0_rmse: 17.76452| val_0_mae: 14.73147|  0:00:00s
epoch 11 | loss: 0.0     | val_0_rmse: 17.76452| val_0_mae: 14.73147|  0:00:00s
epoch 12 | loss: 0.0     | val_0_rmse: 17.76452| val_0_mae: 14.73147|  0:00:00s
epoch 13 | loss: 0.0     | val_0_rmse: 17.76452| val_0_mae: 14.73147|  0:00:00s
epoch 14 | loss: 0.0     | val_0_rmse: 17.76452| val_0_mae: 14.73147|  0:00:00s
epoch 15 | loss: 0.0     | val_0_rmse: 17.76452| val_0_mae: 14.73147|  0:00:00s
epoch 16 | loss: 0.0     | val_0_rmse: 17.76452| val_0_mae: 14.73147|  0:00:00s
epoch 17 | loss: 0.0     | val_0_rmse: 17.76452| val_0_mae: 14.73147|  0:00:00s
epoch 18 | loss: 0.0     | val_0_rmse: 1




=== Model Evaluation ===
Placement Accuracy: 0.5455

Classification Report:
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         7
         1.0       0.63      0.80      0.71        15

    accuracy                           0.55        22
   macro avg       0.32      0.40      0.35        22
weighted avg       0.43      0.55      0.48        22


Salary Metrics (for placed students):
RMSE: 17.76
R² Score: -3.4384

=== Placement Feature Importance ===
                        Feature  Importance
                         tier_2    0.131083
                            dsa    0.086642
    no_of_programming_languages    0.073214
                 dsa_x_projects    0.072105
                    internships    0.072021
               Machine Learning    0.067882
                       gender_M    0.066370
                        web_dev    0.065241
             cgpa_x_internships    0.045788
                           cgpa    0.033630
     