In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

file_path = '/home/bhikrant07/Desktop/AI/KU_STUDENT_DATA_ON_CAMPUS_PLACEMENT.csv'
data = pd.read_csv(file_path)

print(data.head(1))

  Branch               Cepo  Program  End term exam SGPA - 1st semester   \
0    CSE  Currently enrolled  B.Tech                                 6.5   

   End term exam SGPA - 2nd semester  End term exam SGPA - 3rd semester  \
0                                7.2                                6.1   

   End term exam SGPA - 4th semester  End term exam SGPA - 5th semester  \
0                                8.2                                6.8   

   End term exam SGPA - 6th semester  End term exam SGPA - 7th semester  ...  \
0                                6.6                                6.3  ...   

   EDU_LN  SCHL_RCV  URB_RUR INT_CONN How many hrs you study after school?  \
0     Yes        No    Urban     Good                                    1   

   How many value added program you have entered? (coursera/ AWS/IBM etc)  \
0                                                  2                        

   SPOR_PSN  COC_PART COC_PART_ROLE Cam_plc  
0       Yes       yes   Te

In [2]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler

data = data.apply(lambda x: x.str.lower() if x.dtype == "object" else x)

categorical_features = data.select_dtypes(include=['object']).columns.tolist()
encoder = OneHotEncoder(sparse=False,drop ='first')
encoded_categorical_data = encoder.fit_transform(data[categorical_features])

numerical_features = data.select_dtypes(include=['float64', 'int64']).columns.tolist()
scaler = StandardScaler()
scaled_numerical_data = scaler.fit_transform(data[numerical_features])

encoded_categorical_df = pd.DataFrame(encoded_categorical_data, columns=encoder.get_feature_names_out(categorical_features))
scaled_numerical_df = pd.DataFrame(scaled_numerical_data, columns=numerical_features)
processed_data = pd.concat([encoded_categorical_df, scaled_numerical_df], axis=1)



In [3]:
print(processed_data.head(1))

   Branch_cse  Branch_ece  Branch_ee  Branch_me  Branch_mscit  \
0         1.0         0.0        0.0        0.0           0.0   

   Cepo _passed out  Program_mscit  C_X_B_state board  C_XII_B_state board  \
0               0.0            0.0                1.0                  1.0   

   M_F_male  ...  End term exam SGPA - 6th semester  \
0       1.0  ...                           -0.93521   

   End term exam SGPA - 7th semester  End term exam SGPA - 8th semester  \
0                          -0.674564                          -0.704771   

   CGPA after 8th semester  Class X grade  Class XII grade  \
0                -0.759161       0.222934        -0.335895   

   Overall Attendance percentage  Number of internships during undergraduate.  \
0                       0.158114                                     0.192524   

   How many hrs you study after school?  \
0                             -0.774139   

   How many value added program you have entered? (coursera/ AWS/IBM etc)  

In [4]:
print(processed_data.columns)

Index(['Branch_cse', 'Branch_ece', 'Branch_ee', 'Branch_me', 'Branch_mscit',
       'Cepo _passed out', 'Program_mscit', 'C_X_B_state board',
       'C_XII_B_state board', 'M_F_male', 'C_HLTH_good', 'C_HLTH_poor',
       'FAM_TYPE_nuclear family', 'EDU_LN_yes', 'SCHL_RCV_yes',
       'URB_RUR_urban', 'INT_CONN_poor', 'SPOR_PSN_yes', 'COC_PART_yes',
       'COC_PART_ROLE_volunteer', 'Cam_plc_yes',
       'End term exam SGPA - 1st semester ',
       'End term exam SGPA - 2nd semester',
       'End term exam SGPA - 3rd semester',
       'End term exam SGPA - 4th semester',
       'End term exam SGPA - 5th semester',
       'End term exam SGPA - 6th semester',
       'End term exam SGPA - 7th semester',
       'End term exam SGPA - 8th semester', 'CGPA after 8th semester',
       'Class X grade', 'Class XII grade', 'Overall Attendance percentage',
       'Number of internships during undergraduate.',
       'How many hrs you study after school?',
       'How many value added program you ha

In [5]:
from sklearn.model_selection import train_test_split
import numpy as np

X_cgpa = processed_data.drop(['CGPA after 8th semester'], axis=1)
y_cgpa = processed_data['CGPA after 8th semester']

X_placement = processed_data.drop(['Cam_plc_yes'], axis=1)
y_placement = processed_data['Cam_plc_yes']

X_train_cgpa, X_test_cgpa, y_train_cgpa, y_test_cgpa = train_test_split(X_cgpa, y_cgpa, test_size=0.2, random_state=42)

# Split the data for placement prediction
X_train_placement, X_test_placement, y_train_placement, y_test_placement = train_test_split(X_placement, y_placement, test_size=0.2, random_state=42)

# Reshape data for LSTM
X_train_cgpa_reshaped = np.reshape(X_train_cgpa.values, (X_train_cgpa.shape[0], X_train_cgpa.shape[1], 1))
X_test_cgpa_reshaped = np.reshape(X_test_cgpa.values, (X_test_cgpa.shape[0], X_test_cgpa.shape[1], 1))

X_train_placement_reshaped = np.reshape(X_train_placement.values, (X_train_placement.shape[0], X_train_placement.shape[1], 1))
X_test_placement_reshaped = np.reshape(X_test_placement.values, (X_test_placement.shape[0], X_test_placement.shape[1], 1))


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.optimizers import Adam


# LSTM model for CGPA prediction
def create_cgpa_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(50, return_sequences=True))
    # model.add(LSTM(50, return_sequences=True))
    model.add(LSTM(50))
    model.add(Dense(1, activation='linear'))    
    
    optimizer = Adam(learning_rate=0.01)
    model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mse'])
    return model

# LSTM model for placement prediction
def create_placement_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(50, return_sequences=True))
    model.add(LSTM(50, return_sequences=True))
    model.add(LSTM(50))
    model.add(Dense(1, activation='sigmoid'))  
    
    optimizer = Adam(learning_rate=0.009)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model


2024-06-30 23:43:45.174411: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-30 23:43:45.200400: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
# Train & evaluate 
model_cgpa = create_cgpa_lstm_model((X_train_cgpa_reshaped.shape[1], X_train_cgpa_reshaped.shape[2]))
model_cgpa.fit(X_train_cgpa_reshaped, y_train_cgpa, epochs=10, batch_size=32, verbose=1)
loss_cgpa, mse_cgpa = model_cgpa.evaluate(X_test_cgpa_reshaped, y_test_cgpa, verbose=0)
print('MSE for CGPA:', mse_cgpa)

model_placement = create_placement_lstm_model((X_train_placement_reshaped.shape[1], X_train_placement_reshaped.shape[2]))
model_placement.fit(X_train_placement_reshaped, y_train_placement, epochs=10, batch_size=32, verbose=1)
loss_placement, acc_placement = model_placement.evaluate(X_test_placement_reshaped, y_test_placement, verbose=0)
print('Accuracy for Placement:', acc_placement)

Epoch 1/10


2024-06-30 23:43:46.611322: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:282] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - loss: 0.9765 - mse: 0.9765
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.5314 - mse: 0.5314
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.3864 - mse: 0.3864
Epoch 4/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.4076 - mse: 0.4076
Epoch 5/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.2756 - mse: 0.2756
Epoch 6/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.2907 - mse: 0.2907
Epoch 7/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.2751 - mse: 0.2751
Epoch 8/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.2467 - mse: 0.2467
Epoch 9/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.2510

  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - accuracy: 0.4903 - loss: 0.7036
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.4837 - loss: 0.6954
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.4730 - loss: 0.6949
Epoch 4/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.5096 - loss: 0.6928
Epoch 5/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.4329 - loss: 0.6947
Epoch 6/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.5225 - loss: 0.6928
Epoch 7/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.5012 - loss: 0.6933
Epoch 8/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.4900 - loss: 0.6933
Epoch 9/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [8]:
def predict_student_cgpa(student_index):
# Select a student for prediction
#student_index  
    student_data_a = X_cgpa.iloc[student_index]
    
    cgpa_input = np.array(student_data_a).reshape((1, len(student_data_a), 1))
    
    predicted_cgpa = model_cgpa.predict(cgpa_input)
    print('Predicted CGPA (original scale):', predicted_cgpa[0][0])

    
    y_cgpa = data['CGPA after 8th semester']
    y_train_cgpa = np.array(y_cgpa.values)
    scaler = StandardScaler()
    y_train_cgpa_scaled = scaler.fit_transform(y_train_cgpa.reshape(-1, 1)) 
    predicted_cgpa_original = scaler.inverse_transform(predicted_cgpa) 
    print('Predicted CGPA (original scale):', predicted_cgpa_original[0][0])


def predict_student_placement(student_index):
    
    student_data_b = X_placement.iloc[student_index]
    
    placement_input = np.array(student_data_b).reshape((1, len(student_data_b), 1))
    
    predicted_placement = model_placement.predict(placement_input)
    print('Predicted Placement:', 'Placed' if predicted_placement[0][0] > 0.5 else 'Not Placed')

In [9]:
#test 1
predict_student_cgpa(0)
predict_student_placement(0)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step
Predicted CGPA (original scale): -0.4159881
Predicted CGPA (original scale): 6.615102
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 218ms/step
Predicted Placement: Placed


In [10]:
data.iloc[0]['CGPA after 8th semester']

6.3

In [11]:
data.iloc[0]['Cam_plc']

'yes'

In [12]:
#test 2
predict_student_cgpa(6)
predict_student_placement(6)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Predicted CGPA (original scale): 1.0545046
Predicted CGPA (original scale): 7.9653087
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Predicted Placement: Placed


In [13]:
data.iloc[6]['CGPA after 8th semester']

8.03

In [14]:
data.iloc[6]['Cam_plc']

'yes'