In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC


In [None]:

# Simulate student profile and internship outcome data
np.random.seed(42)

n = 500
data = {
    'CGPA': np.round(np.random.normal(7.5, 0.7, n), 2),
    'Participation Score': np.random.randint(1, 11, size=n),
    'Project Count': np.random.randint(0, 6, size=n),
    'Workshops Attended': np.random.randint(0, 10, size=n),
    'Year': np.random.choice([2, 3, 4], size=n),
    'Branch': np.random.choice(['CSE', 'ECE', 'EEE', 'MECH', 'CIVIL'], size=n),
    'Selected': np.random.choice([0, 1], size=n, p=[0.4, 0.6])
}

df = pd.DataFrame(data)
df.head()


In [None]:

# Convert categorical data
df = pd.get_dummies(df, columns=['Branch'], drop_first=True)

# Features and target
X = df.drop('Selected', axis=1)
y = df['Selected']

# Normalize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

X.shape, y.shape


In [None]:

logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred_logreg = logreg.predict(X_test)

print("Logistic Regression Classification Report:")
print(classification_report(y_test, y_pred_logreg))


In [None]:

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print("Random Forest Classification Report:")
print(classification_report(y_test, y_pred_rf))


In [None]:

svm = SVC()
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

print("Support Vector Machine Classification Report:")
print(classification_report(y_test, y_pred_svm))


In [None]:

fig, axes = plt.subplots(1, 3, figsize=(18, 5))

for ax, preds, model in zip(axes, [y_pred_logreg, y_pred_rf, y_pred_svm], ['LogReg', 'RandomForest', 'SVM']):
    cm = confusion_matrix(y_test, preds)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
    ax.set_title(f'{model} Confusion Matrix')
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')

plt.tight_layout()
plt.show()
