In [3]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import pickle
import warnings
from flask import Flask, request, render_template

# Suppress warnings
warnings.filterwarnings('ignore')

# Read the dataset
df = pd.read_csv('/content/prediction_data.csv')
print("Dataset Shape:", df.shape)
print("First few rows of the dataset:")
print(df.head())

# Drop unnecessary columns
df.drop([
    'Round_1_Technical',
    'Round_2_Essay_Writing',
    'Round_3_Gaming',
    'Round_4_Problem_Solving',
    'Round_5_Speaking',
    'Round_6_Interview'
], axis=1, inplace=True)

# Define possible categories for categorical columns
possible_levels = {
    'Project_Level': ['Beginner', 'Intermediate', 'Advanced', 'Expert'],
    'Internship_Level': ['Beginner', 'Intermediate', 'Advanced', 'Expert'],
    'Certification_Level': ['Beginner', 'Intermediate', 'Advanced', 'Expert'],
    'Field_of_Study': ['CS', 'IT', 'ECE', 'EEE', 'Mechanical', 'Civil', 'Other']
}

# Encode categorical columns
le_dict = {}
for col, levels in possible_levels.items():
    le = LabelEncoder()
    le.fit(levels)
    le_dict[col] = le
    if col in df.columns:
        df[col] = df[col].fillna(levels[0])  # Fill missing with the first level
        df[col] = le.fit_transform(df[col])

# Encode target column
target_encoder = LabelEncoder()
df['Placed'] = df['Placed'].fillna('No')  # Fill missing values
df['Placed'] = target_encoder.fit_transform(df['Placed'])

# Save label encoders for future use
with open('label_encoders.pkl', 'wb') as f:
    pickle.dump({'categorical': le_dict, 'target': target_encoder}, f)

# Prepare features and target
X = df.drop(['Placed'], axis=1)
y = df['Placed']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Save the trained model
with open('placement_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)
print("Model trained and saved as placement_model.pkl")

Dataset Shape: (500, 21)
First few rows of the dataset:
    CPI  English_Speaking_Skills  Grammar_Skills  Coding_Skills  \
0  6.09                       10               9              8   
1  7.38                        1               5              2   
2  9.36                        3               2              2   
3  6.29                        4               1              6   
4  6.27                        6               7              3   

   Total_No_of_Projects Project_Level Internship_Level Certification_Level  \
0                     7  Intermediate              NaN               Basic   
1                    20  Intermediate              NaN                 NaN   
2                     3      Beginner              NaN        Professional   
3                    12  Intermediate              NaN              Expert   
4                    12      Advanced              NaN                 NaN   

   Total_No_of_Internships  Total_No_of_Certifications  ...  Field_of_St

In [7]:
df.head(375)

Unnamed: 0,CPI,English_Speaking_Skills,Grammar_Skills,Coding_Skills,Total_No_of_Projects,Project_Level,Internship_Level,Certification_Level,Total_No_of_Internships,Total_No_of_Certifications,Programming_Skills,Field_of_Study,Total_No_of_LeetCode_Questions_Solved,Total_No_of_Questions_Other_Platform,Placed
0,6.09,10,9,8,7,2,1,0,1,2,1,2,187,167,0
1,7.38,1,5,2,20,2,1,1,0,0,1,2,88,323,1
2,9.36,3,2,2,3,1,1,3,3,11,7,2,411,126,0
3,6.29,4,1,6,12,2,1,2,1,7,6,0,137,232,0
4,6.27,6,7,3,12,0,1,1,7,7,10,2,139,236,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
370,6.82,8,4,7,13,0,1,3,5,1,5,4,463,380,0
371,9.30,1,4,7,12,1,0,0,5,4,2,1,215,105,0
372,9.94,9,1,2,17,0,2,2,4,10,4,4,436,306,1
373,9.45,3,2,5,7,1,1,3,2,0,8,1,385,91,1


In [5]:
X_test

Unnamed: 0,CPI,English_Speaking_Skills,Grammar_Skills,Coding_Skills,Total_No_of_Projects,Project_Level,Internship_Level,Certification_Level,Total_No_of_Internships,Total_No_of_Certifications,Programming_Skills,Field_of_Study,Total_No_of_LeetCode_Questions_Solved,Total_No_of_Questions_Other_Platform
361,7.17,3,3,3,6,0,2,2,9,4,7,0,414,341
73,8.46,5,9,2,0,1,0,3,6,10,8,2,57,203
374,8.35,6,2,6,13,2,1,1,0,7,3,3,393,82
155,9.78,1,9,4,9,2,1,3,4,15,4,0,273,381
104,8.83,5,5,9,1,0,2,2,9,2,3,1,50,478
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
347,9.39,1,8,1,6,1,1,2,3,3,6,2,291,485
86,8.60,4,8,2,1,0,0,1,1,3,6,1,290,199
75,7.37,9,3,10,9,0,1,3,10,11,8,2,275,78
438,7.81,10,2,10,19,2,0,2,5,3,6,0,318,111


In [4]:
rf_model.predict(X_test)

array([1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1,
       0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,
       0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0])

In [6]:
import numpy as np

# Example input data as a feature array
input_data = [
    8.35,  # CPI
    6.0,   # English_Speaking_Skills
    2.0,   # Grammar_Skills
    6.0,   # Coding_Skills
    13,    # Total_No_of_Projects
    2,     # Project_Level (encoded)
    1,     # Internship_Level (encoded)
    1,     # Certification_Level (encoded)
    0,     # Total_No_of_Internships
    7,     # Total_No_of_Certifications
    3.0,   # Programming_Skills
    3,     # Field_of_Study (encoded)
    393,   # Total_No_of_LeetCode_Questions_Solved
    82     # Total_No_of_Questions_Other_Platform
]

# Convert input to a NumPy array and reshape for prediction
final_features = np.array(input_data).reshape(1, -1)

# Load the trained RandomForest model
import pickle
with open('placement_model.pkl', 'rb') as f:
    rf_model = pickle.load(f)

# Perform prediction
prediction = rf_model.predict(final_features)

# Output the result
output = 'Placed' if prediction[0] == 1 else 'Not Placed'
print(f'Prediction: {output}')


Prediction: Not Placed
