<h1 style='text-align:center;color:green;font-weight:bold'>ML-Based Campus Placement Prediction System</h1>

### 📦 Step 1: Import Required Libraries

In [80]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

### 📥 Step 2: Load and Prepare Data

In [94]:
#readind data
df = pd.read_csv('Placement.csv')

In [95]:
# display five rows of data
df.head()

Unnamed: 0,sl_no,gender,ssc_p,ssc_b,hsc_p,hsc_b,hsc_s,degree_p,degree_t,workex,etest_p,specialisation,mba_p,status,salary
0,1,0,67.0,Others,91.0,Others,Commerce,58.0,Sci&Tech,No,55.0,Mkt&HR,58.8,Placed,270000.0
1,2,0,79.33,Central,78.33,Others,Science,77.48,Sci&Tech,Yes,86.5,Mkt&Fin,66.28,Placed,200000.0
2,3,0,65.0,Central,68.0,Central,Arts,64.0,Comm&Mgmt,No,75.0,Mkt&Fin,57.8,Placed,250000.0
3,4,0,56.0,Central,52.0,Central,Science,52.0,Sci&Tech,No,66.0,Mkt&HR,59.43,Not Placed,
4,5,0,85.8,Central,73.6,Central,Commerce,73.3,Comm&Mgmt,No,96.8,Mkt&Fin,55.5,Placed,425000.0


In [96]:
#no of rows and columns
df.shape

(215, 15)

In [97]:
#getting basic information of data
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 215 entries, 0 to 214
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   sl_no           215 non-null    int64  
 1   gender          215 non-null    int64  
 2   ssc_p           215 non-null    float64
 3   ssc_b           215 non-null    object 
 4   hsc_p           215 non-null    float64
 5   hsc_b           215 non-null    object 
 6   hsc_s           215 non-null    object 
 7   degree_p        215 non-null    float64
 8   degree_t        215 non-null    object 
 9   workex          215 non-null    object 
 10  etest_p         215 non-null    float64
 11  specialisation  215 non-null    object 
 12  mba_p           215 non-null    float64
 13  status          215 non-null    object 
 14  salary          148 non-null    float64
dtypes: float64(6), int64(2), object(7)
memory usage: 25.3+ KB


In [98]:
#stastical information of data
df.describe()

Unnamed: 0,sl_no,gender,ssc_p,hsc_p,degree_p,etest_p,mba_p,salary
count,215.0,215.0,215.0,215.0,215.0,215.0,215.0,148.0
mean,108.0,0.353488,67.303395,66.333163,66.370186,72.100558,62.278186,288655.405405
std,62.209324,0.479168,10.827205,10.897509,7.358743,13.275956,5.833385,93457.45242
min,1.0,0.0,40.89,37.0,50.0,50.0,51.21,200000.0
25%,54.5,0.0,60.6,60.9,61.0,60.0,57.945,240000.0
50%,108.0,0.0,67.0,65.0,66.0,71.0,62.0,265000.0
75%,161.5,1.0,75.7,73.0,72.0,83.5,66.255,300000.0
max,215.0,1.0,89.4,97.7,91.0,98.0,77.89,940000.0


In [99]:
#finding null values
df.isnull().sum()

sl_no              0
gender             0
ssc_p              0
ssc_b              0
hsc_p              0
hsc_b              0
hsc_s              0
degree_p           0
degree_t           0
workex             0
etest_p            0
specialisation     0
mba_p              0
status             0
salary            67
dtype: int64

In [100]:
#hundling missing value
df['salary'] = df['salary'].fillna(value=df['salary'].mean())
df.isnull().sum().sum()

0

In [101]:
#finding duplicated value
df.duplicated().sum()

0

### 🧪 Step 3: Define Feature Groups

In [102]:
X = df.drop(['sl_no','status'], axis=1)
y = df['status']

num_cols = ['ssc_p', 'hsc_p', 'degree_p', 'etest_p', 'mba_p', 'salary']
binary_cat_cols = ['ssc_b', 'hsc_b', 'workex']
nominal_cat_cols = ['hsc_s', 'degree_t', 'specialisation']

#### ⚙️ Step 4: Create Preprocessing Transformer

In [103]:
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), num_cols),
    ('bin', OrdinalEncoder(), binary_cat_cols),
    ('nom', OneHotEncoder(drop='first'), nominal_cat_cols)
])

### 🤖 Step 5: Define All Models

In [104]:
models = {
    'Logistic Regression': LogisticRegression(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Support Vector Machine': svm.SVC(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier()
}

### 🚀 Step 6: Train and Evaluate Each Model

In [105]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

for name, model_instance in models.items():
    pipeline = Pipeline(steps=[
        ('preprocessing', preprocessor),
        ('classifier', model_instance)
    ])
    
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    
    print(f"\n🔍 Model: {name}")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    
    # Save the trained model
    joblib.dump(pipeline, f"{name.replace(' ', '_').lower()}_model.joblib")



🔍 Model: Logistic Regression
Accuracy: 0.86
Classification Report:
               precision    recall  f1-score   support

  Not Placed       0.75      0.75      0.75        12
      Placed       0.90      0.90      0.90        31

    accuracy                           0.86        43
   macro avg       0.83      0.83      0.83        43
weighted avg       0.86      0.86      0.86        43

Confusion Matrix:
 [[ 9  3]
 [ 3 28]]

🔍 Model: K-Nearest Neighbors
Accuracy: 0.79
Classification Report:
               precision    recall  f1-score   support

  Not Placed       0.71      0.42      0.53        12
      Placed       0.81      0.94      0.87        31

    accuracy                           0.79        43
   macro avg       0.76      0.68      0.70        43
weighted avg       0.78      0.79      0.77        43

Confusion Matrix:
 [[ 5  7]
 [ 2 29]]

🔍 Model: Support Vector Machine
Accuracy: 0.84
Classification Report:
               precision    recall  f1-score   support

  Not

### Traind Your Model 

#### Split the dataset into train and test dataset

In [74]:
from sklearn.model_selection import train_test_split

In [75]:
X_train,X_test,y_train,y_test = train_test_split(X, y, train_size=0.2 ,random_state=42)

### Import Models

In [79]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier