In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from kerastuner.tuners import RandomSearch
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score, f1_score


2024-07-08 15:16:46.680802: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-08 15:16:46.701994: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-08 15:16:46.702023: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-08 15:16:46.713822: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from kerastuner.tuners import RandomSearch


In [2]:
file_path = '/home/bhikrant07/Desktop/AI/KU_STUDENT_DATA_ON_CAMPUS_PLACEMENT.csv'
data = pd.read_csv(file_path)

print(data.head(1))

  Branch               Cepo  Program  End term exam SGPA - 1st semester   \
0    CSE  Currently enrolled  B.Tech                                 6.5   

   End term exam SGPA - 2nd semester  End term exam SGPA - 3rd semester  \
0                                7.2                                6.1   

   End term exam SGPA - 4th semester  End term exam SGPA - 5th semester  \
0                                8.2                                6.8   

   End term exam SGPA - 6th semester  End term exam SGPA - 7th semester  ...  \
0                                6.6                                6.3  ...   

   EDU_LN  SCHL_RCV  URB_RUR INT_CONN How many hrs you study after school?  \
0     Yes        No    Urban     Good                                    1   

   How many value added program you have entered? (coursera/ AWS/IBM etc)  \
0                                                  2                        

   SPOR_PSN  COC_PART COC_PART_ROLE Cam_plc  
0       Yes       yes   Te

In [3]:
data = data.apply(lambda x: x.str.lower() if x.dtype == "object" else x) 

# Splitting the data for CGPA prediction
X_cgpa = data.drop(['CGPA after 8th semester', 'Cam_plc'], axis=1)
y_cgpa = data['CGPA after 8th semester']

X_train_cgpa, X_test_cgpa, y_train_cgpa, y_test_cgpa = train_test_split(X_cgpa, y_cgpa, test_size=0.2, random_state=42)

# Splitting the data for placement prediction (including CGPA as a feature)
X_placement = data.drop(['Cam_plc'], axis=1)
y_placement = data['Cam_plc']

X_train_placement, X_test_placement, y_train_placement, y_test_placement = train_test_split(X_placement, y_placement, test_size=0.2, random_state=42)


In [4]:
# Fit the encoders and scalers on the training data only
categorical_features = X_cgpa.select_dtypes(include=['object']).columns.tolist()
numerical_features_cgpa = X_cgpa.select_dtypes(include=['float64', 'int64']).columns.tolist()
numerical_features_placement = X_placement.select_dtypes(include=['float64', 'int64']).columns.tolist()

encoder = OneHotEncoder(sparse_output=False, drop='first')
scaler_cgpa = StandardScaler()
scaler_placement = StandardScaler()

In [5]:
from sklearn.preprocessing import LabelEncoder
# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Encode the target variable 'y' for both train and test sets
y_train_placement = label_encoder.fit_transform(y_train_placement)
y_test_placement = label_encoder.transform(y_test_placement)

In [6]:
# Process CGPA data
X_train_categorical_cgpa = encoder.fit_transform(X_train_cgpa[categorical_features])
X_test_categorical_cgpa = encoder.transform(X_test_cgpa[categorical_features])

X_train_numerical_cgpa = scaler_cgpa.fit_transform(X_train_cgpa[numerical_features_cgpa])
X_test_numerical_cgpa = scaler_cgpa.transform(X_test_cgpa[numerical_features_cgpa])

X_train_processed_cgpa = pd.concat([pd.DataFrame(X_train_categorical_cgpa, columns=encoder.get_feature_names_out(categorical_features)),
                                    pd.DataFrame(X_train_numerical_cgpa, columns=numerical_features_cgpa)], axis=1)
X_test_processed_cgpa = pd.concat([pd.DataFrame(X_test_categorical_cgpa, columns=encoder.get_feature_names_out(categorical_features)),
                                   pd.DataFrame(X_test_numerical_cgpa, columns=numerical_features_cgpa)], axis=1)

# Process Placement data
X_train_categorical_placement = encoder.transform(X_train_placement[categorical_features])
X_test_categorical_placement = encoder.transform(X_test_placement[categorical_features])

X_train_numerical_placement = scaler_placement.fit_transform(X_train_placement[numerical_features_placement])
X_test_numerical_placement = scaler_placement.transform(X_test_placement[numerical_features_placement])

X_train_processed_placement = pd.concat([pd.DataFrame(X_train_categorical_placement, columns=encoder.get_feature_names_out(categorical_features)),
                                         pd.DataFrame(X_train_numerical_placement, columns=numerical_features_placement)], axis=1)
X_test_processed_placement = pd.concat([pd.DataFrame(X_test_categorical_placement, columns=encoder.get_feature_names_out(categorical_features)),
                                        pd.DataFrame(X_test_numerical_placement, columns=numerical_features_placement)], axis=1)

In [7]:
print(X_train_processed_placement.head(1))

   Branch_cse  Branch_ece  Branch_ee  Branch_me  Branch_mscit  \
0         0.0         0.0        0.0        1.0           0.0   

   Cepo _passed out  Program_mscit  C_X_B_state board  C_XII_B_state board  \
0               0.0            0.0                0.0                  1.0   

   M_F_male  ...  End term exam SGPA - 6th semester  \
0       0.0  ...                           0.523461   

   End term exam SGPA - 7th semester  End term exam SGPA - 8th semester  \
0                          -0.660108                          -0.694831   

   CGPA after 8th semester  Class X grade  Class XII grade  \
0                 -0.77013      -0.054577         0.080749   

   Overall Attendance percentage  Number of internships during undergraduate.  \
0                       0.166957                                     0.168771   

   How many hrs you study after school?  \
0                              -0.71617   

   How many value added program you have entered? (coursera/ AWS/IBM etc)  

In [8]:
# Reshape data for LSTM input
X_train_cgpa_reshaped = np.reshape(X_train_processed_cgpa.values, (X_train_processed_cgpa.shape[0], X_train_processed_cgpa.shape[1], 1))
X_test_cgpa_reshaped = np.reshape(X_test_processed_cgpa.values, (X_test_processed_cgpa.shape[0], X_test_processed_cgpa.shape[1], 1))

X_train_placement_reshaped = np.reshape(X_train_processed_placement.values, (X_train_processed_placement.shape[0], X_train_processed_placement.shape[1], 1))
X_test_placement_reshaped = np.reshape(X_test_processed_placement.values, (X_test_processed_placement.shape[0], X_test_processed_placement.shape[1], 1))

In [9]:
# Define the model building function for CGPA prediction
def build_model_cgpa(hp):
    model = Sequential()
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(LSTM(units=hp.Int('units_' + str(i), min_value=32, max_value=128, step=32),
                       return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
                       input_shape=(X_train_cgpa_reshaped.shape[1], 1)))
        model.add(Dropout(rate=hp.Float('dropout_' + str(i), min_value=0.2, max_value=0.5, step=0.1)))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Define the model building function for placement prediction
def build_model_placement(hp):
    model = Sequential()
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(LSTM(units=hp.Int('units_' + str(i), min_value=32, max_value=128, step=32),
                       return_sequences=True if i < hp.Int('num_layers', 1, 3) - 1 else False,
                       input_shape=(X_train_placement_reshaped.shape[1], 1)))
        model.add(Dropout(rate=hp.Float('dropout_' + str(i), min_value=0.2, max_value=0.5, step=0.1)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [10]:
# Hyperparameter tuning with Keras Tuner for CGPA prediction
tuner_cgpa = RandomSearch(build_model_cgpa,
                          objective='val_loss',
                          max_trials=5,
                          executions_per_trial=3,
                          directory='my_dir',
                          project_name='cgpa_tuning')

tuner_cgpa.search(X_train_cgpa_reshaped, y_train_cgpa, epochs=50, validation_split=0.2, callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

# Get the optimal hyperparameters
best_hps_cgpa = tuner_cgpa.get_best_hyperparameters(num_trials=1)[0]
print(f'Best CGPA Hyperparameters: {best_hps_cgpa.values}')

Reloading Tuner from my_dir/cgpa_tuning/tuner0.json
Best CGPA Hyperparameters: {'num_layers': 2, 'units_0': 32, 'dropout_0': 0.2, 'units_1': 64, 'dropout_1': 0.4, 'units_2': 96, 'dropout_2': 0.2}


In [11]:
# Hyperparameter tuning with Keras Tuner for placement prediction
tuner_placement = RandomSearch(
    build_model_placement,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='my_dir',
    project_name='placement_tuning')

tuner_placement.search(X_train_placement_reshaped, y_train_placement, epochs=50, validation_split=0.2, callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

# Get the optimal hyperparameters for placement prediction
best_hps_placement = tuner_placement.get_best_hyperparameters(num_trials=1)[0]
print(f'Best Placement Hyperparameters: {best_hps_placement.values}')

2024-07-08 15:16:49.599374: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:282] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


Reloading Tuner from my_dir/placement_tuning/tuner0.json
Best Placement Hyperparameters: {'num_layers': 1, 'units_0': 128, 'dropout_0': 0.4, 'units_1': 96, 'dropout_1': 0.30000000000000004, 'units_2': 32, 'dropout_2': 0.4}


In [12]:
# Build and train the final CGPA model
model_cgpa = tuner_cgpa.hypermodel.build(best_hps_cgpa)
epochs_cgpa = best_hps_cgpa['epochs'] if 'epochs' in best_hps_cgpa else 50  # Default to 50 if 'epochs' is not found
history_cgpa = model_cgpa.fit(X_train_cgpa_reshaped, y_train_cgpa, epochs=epochs_cgpa, validation_split=0.2, callbacks=[EarlyStopping(monitor='val_loss', patience=10)])

# Build and train the final placement model
model_placement = tuner_placement.hypermodel.build(best_hps_placement)
epochs_placement = best_hps_placement['epochs'] if 'epochs' in best_hps_placement else 50  # Default to 50 if 'epochs' is not found
history_placement = model_placement.fit(X_train_placement_reshaped, y_train_placement, epochs=epochs_placement, validation_split=0.2, callbacks=[EarlyStopping(monitor='val_loss', patience=10)])


Epoch 1/50


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 56ms/step - loss: 48.3886 - val_loss: 45.1692
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 38.6726 - val_loss: 13.1377
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 9.0412 - val_loss: 2.2004
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 2.5785 - val_loss: 1.0387
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 2.1307 - val_loss: 1.1623
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 1.9124 - val_loss: 0.8710
Epoch 7/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 1.8358 - val_loss: 1.0196
Epoch 8/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 1.7485 - val_loss: 1.0148
Epoch 9/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0

  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 48ms/step - accuracy: 0.4211 - loss: 0.6975 - val_accuracy: 0.4576 - val_loss: 0.6934
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.5314 - loss: 0.6930 - val_accuracy: 0.4746 - val_loss: 0.6932
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.4692 - loss: 0.6931 - val_accuracy: 0.4915 - val_loss: 0.6935
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.4870 - loss: 0.6929 - val_accuracy: 0.5593 - val_loss: 0.6931
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.5406 - loss: 0.6905 - val_accuracy: 0.5763 - val_loss: 0.6921
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.5150 - loss: 0.6882 - val_accuracy: 0.5763 - val_loss: 0.6916
Epoch 7/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [28]:
def predict_student_cgpa(student_index):
    # Select student data from X_cgpa
    student_data_a = X_cgpa.iloc[student_index]
    
    # Convert student_data_a to NumPy array and reshape for model input
    cgpa_input = np.array(student_data_a).reshape((1, len(student_data_a), 1))
    
    # Perform CGPA prediction using model_cgpa
    predicted_cgpa = model_cgpa.predict(cgpa_input)
    
    # Reshape predicted_cgpa for inverse transform
    predicted_cgpa_reshaped = predicted_cgpa.reshape((1, 1))  # Assuming a single prediction for one student
    
    # Inverse transform to get CGPA in original scale
    predicted_cgpa_original = scaler_cgpa.inverse_transform(predicted_cgpa_reshaped)
    
    # Print the predicted CGPA in original scale
    print('Predicted CGPA (original scale):', predicted_cgpa_original[0][0])



def predict_student_placement(student_index):
    # Select student data from X_placement
    student_data_b = X_placement.iloc[student_index]
    
    # Convert student_data_b to NumPy array and reshape for model input
    placement_input = np.array(student_data_b).reshape((1, len(student_data_b), 1))
    
    # Perform placement prediction using model_placement
    predicted_placement = model_placement.predict(placement_input)
    
    # Define optimal_threshold for binary classification
    optimal_threshold = 0.5
    
    # Print the predicted placement outcome based on the threshold
    print('Predicted Placement:', 'Placed' if predicted_placement[0][0] > optimal_threshold else 'Not Placed')


In [14]:
# # Evaluate the models
# y_pred_cgpa = model_cgpa.predict(X_test_cgpa_reshaped)
# y_pred_cgpa_original = scaler_cgpa.inverse_transform(y_pred_cgpa.reshape(-1, 1))
# rmse = np.sqrt(mean_squared_error(y_test_cgpa, y_pred_cgpa_original))
# print('RMSE for CGPA Prediction:', rmse)

# y_pred_placement = model_placement.predict(X_test_placement_reshaped)
# y_pred_placement_binary = (y_pred_placement > 0.5).astype(int)

# accuracy = accuracy_score(y_test_placement, y_pred_placement_binary)
# precision = precision_score(y_test_placement, y_pred_placement_binary)
# recall = recall_score(y_test_placement, y_pred_placement_binary)
# f1 = f1_score(y_test_placement, y_pred_placement_binary)

# print(f'Accuracy: {accuracy}')
# print(f'Precision: {precision}')
# print(f'Recall: {recall}')
# print(f'F1 Score: {f1}')

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step


ValueError: non-broadcastable output operand with shape (74,1) doesn't match the broadcast shape (74,14)

In [19]:
import pickle
from sklearn.metrics import mean_squared_error, accuracy_score

# Assuming you have imported necessary libraries and defined tuner_cgpa, tuner_placement, X_train_cgpa_reshaped, y_train_cgpa, X_test_cgpa_reshaped, y_test_cgpa, X_train_placement_reshaped, y_train_placement, X_test_placement_reshaped, y_test_placement correctly

# Load the best hyperparameters for CGPA prediction
with open('best_hps_cgpa.pkl', 'rb') as f:
    best_hps_cgpa = pickle.load(f)

# Build and train the CGPA prediction model using the best hyperparameters
model_cgpa = tuner_cgpa.hypermodel.build(best_hps_cgpa)
model_cgpa.fit(X_train_cgpa_reshaped, y_train_cgpa, epochs=100, initial_epoch=10, validation_data=(X_test_cgpa_reshaped, y_test_cgpa))
# Assuming model_cgpa is defined and compiled correctly

# Print the shapes of X_test_cgpa_reshaped and y_test_cgpa
print('X_test_cgpa_reshaped shape:', X_test_cgpa_reshaped.shape)
print('y_test_cgpa shape:', y_test_cgpa.shape)

# Evaluate the model
evaluation_result = model_cgpa.evaluate(X_test_cgpa_reshaped, y_test_cgpa, verbose=0)
print('Evaluation result:', evaluation_result)

# Assuming evaluation_result is a float, you can access the MSE directly
mse_cgpa = evaluation_result
print('MSE for CGPA:', mse_cgpa)

# Load the best hyperparameters for placement prediction
with open('best_hps_placement.pkl', 'rb') as f:
    best_hps_placement = pickle.load(f)

# Build and train the placement prediction model using the best hyperparameters
model_placement = tuner_placement.hypermodel.build(best_hps_placement)
model_placement.fit(X_train_placement_reshaped, y_train_placement, epochs=100, initial_epoch=10, validation_data=(X_test_placement_reshaped, y_test_placement))
loss_placement, acc_placement = model_placement.evaluate(X_test_placement_reshaped, y_test_placement, verbose=0)
print('Accuracy for Placement:', acc_placement)


Epoch 11/100


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 76ms/step - loss: 43.8375 - val_loss: 3.5244
Epoch 12/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 2.3798 - val_loss: 1.4185
Epoch 13/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 1.9566 - val_loss: 1.1622
Epoch 14/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 1.4370 - val_loss: 0.9620
Epoch 15/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 62ms/step - loss: 1.4805 - val_loss: 0.9933
Epoch 16/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 1.4390 - val_loss: 0.9074
Epoch 17/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 1.2690 - val_loss: 0.8823
Epoch 18/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 1.3736 - val_loss: 0.8294
Epoch 19/100
[1m10/10[0m [32m━━━━━━━━━━━━

  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - accuracy: 0.4852 - loss: 0.6930 - val_accuracy: 0.4595 - val_loss: 0.6980
Epoch 12/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.4696 - loss: 0.6954 - val_accuracy: 0.4054 - val_loss: 0.7004
Epoch 13/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.4761 - loss: 0.6933 - val_accuracy: 0.3784 - val_loss: 0.7025
Epoch 14/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.4792 - loss: 0.6920 - val_accuracy: 0.4595 - val_loss: 0.6975
Epoch 15/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.5124 - loss: 0.6922 - val_accuracy: 0.4595 - val_loss: 0.6984
Epoch 16/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5461 - loss: 0.6895 - val_accuracy: 0.4459 - val_loss: 0.6966
Epoch 17/100
[1m10/10[0m [32m━━━━

In [29]:
# Example of how to debug the index error
print('X_test_processed_cgpa shape:', X_test_processed_cgpa.shape)
print('X_test_processed_placement shape:', X_test_processed_placement.shape)

print(data.iloc[10]['CGPA after 8th semester'])
predict_student_cgpa(10)
print(data.iloc[10]['Cam_plc'])
predict_student_placement(10)


X_test_processed_cgpa shape: (74, 34)
X_test_processed_placement shape: (74, 35)
9.13


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.float64).