In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from tech_recommender import recommend_skills  # Use your actual module

# STEP 1: Load Dataset
df = pd.read_excel("Student_Data_With_Extras.xlsx")

# STEP 2: Create Placement Tier Labels (based on Placement column)
def placement_to_tier(salary):
    if salary >= 2000000:
        return 'Tier 1'
    elif salary >= 1000000:
        return 'Tier 2'
    elif salary >= 500000:
        return 'Tier 3'
    else:
        return 'Tier 4'

df['Placement_Tier'] = df['Placement'].apply(placement_to_tier)

# STEP 3: Compute Skill Match Score using Recommender
def compute_skill_match(row):
    try:
        field = row['Field']
        skills = set(row['Skills'].split(', '))
        result = recommend_skills(field, skills)
        return result['Match Score (%)']
    except:
        return 50  # fallback default if data is missing or malformed

df['Skill_Match_Score'] = df.apply(compute_skill_match, axis=1)

# STEP 4: Feature Selection
features = [
    'Coding_Profile_Rating', 'Grades', 'Major_Projects', 'Mini_Projects',
    'Internship', 'Hackathon', 'Skill_Match_Score',
    'Communication_Skill_Rating', 'Workshops_Certifications','Attendance'
]

X = df[features]
y = df['Placement_Tier']

# STEP 5: Encode and Scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# STEP 6: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# STEP 7: Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# STEP 8: Evaluate
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))



              precision    recall  f1-score   support

      Tier 1       0.94      0.95      0.95      1088
      Tier 2       0.74      0.76      0.75       429
      Tier 3       0.54      0.46      0.50       162
      Tier 4       0.73      0.70      0.71       126

    accuracy                           0.85      1805
   macro avg       0.74      0.72      0.73      1805
weighted avg       0.84      0.85      0.84      1805



In [3]:
# STEP 9: Save Model and Scaler
import joblib
joblib.dump(clf, "placement_tier_classifier.pkl")
joblib.dump(scaler, "placement_scaler.pkl")


['placement_scaler.pkl']

In [None]:
'Class_Participation_High', 'Professor_Quality',

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score



In [None]:
# STEP 1: Load Dataset
df = pd.read_excel("Student_Data_With_Extras.xlsx")

# STEP 2: Simulate Skill Match Score (from Tech Skill Recommender)
np.random.seed(42)
df['Skill_Match_Score'] = np.random.randint(40, 101, size=len(df))


In [None]:
# STEP 3: Define Features and Target
important_features = [
    'Coding_Profile_Rating', 'Grades', 'Major_Projects', 'Mini_Projects',
    'Internship', 'Hackathon', 'Skill_Match_Score'
]

supportive_features = [
    'Communication_Skill_Rating', 'Workshops_Certifications',
    'Class_Participation_High', 'Professor_Quality', 'Attendance'
]

X = df[important_features + supportive_features]
y = df['Placement']


In [None]:
# STEP 4: Preprocessing
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)



In [None]:
# STEP 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# STEP 6: Train Random Forest
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:
# STEP 7: Evaluate
y_pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

# STEP 8: Save Model and Scaler (optional)
import joblib
joblib.dump(model, "placement_rf_model.pkl")
joblib.dump(scaler, "placement_scaler.pkl")