In [1]:
# prompt: write code for accessing gdrive acesss

from google.colab import drive
drive.mount('/content/gdrive')


Mounted at /content/gdrive


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the dataset
data = pd.read_csv("/content/gdrive/MyDrive/Student performance /Student_performance_data _.csv")

In [3]:
# Display the first few rows of the dataset
print(data.head())

# Check for missing values
print(data.isnull().sum())

# Define features (X) and target (y)
X = data.drop(['StudentID', 'GPA', 'GradeClass'], axis=1)
y = data['GradeClass']

# Check the data type of the target variable
print(y.dtype)

# Ensure the target variable is categorical
y = y.astype('category')

   StudentID  Age  Gender  Ethnicity  ParentalEducation  StudyTimeWeekly  \
0       1001   17       1          0                  2        19.833723   
1       1002   18       0          0                  1        15.408756   
2       1003   15       0          2                  3         4.210570   
3       1004   17       1          0                  3        10.028829   
4       1005   17       1          0                  2         4.672495   

   Absences  Tutoring  ParentalSupport  Extracurricular  Sports  Music  \
0         7         1                2                0       0      1   
1         0         0                1                0       0      0   
2        26         0                2                0       0      0   
3        14         0                3                1       0      0   
4        17         1                3                0       0      0   

   Volunteering       GPA  GradeClass  
0             0  2.929196         2.0  
1             0  3

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)
log_reg_accuracy = accuracy_score(y_test, y_pred_log_reg)
print(f'Logistic Regression Accuracy: {log_reg_accuracy}')
print(confusion_matrix(y_test, y_pred_log_reg))
print(classification_report(y_test, y_pred_log_reg))


Logistic Regression Accuracy: 0.6826722338204593
[[  0  15   3   2   2]
 [  5  19  20   1   4]
 [  2  12  49  16   6]
 [  1   2  21  31  31]
 [  0   1   0   8 228]]
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00        22
         1.0       0.39      0.39      0.39        49
         2.0       0.53      0.58      0.55        85
         3.0       0.53      0.36      0.43        86
         4.0       0.84      0.96      0.90       237

    accuracy                           0.68       479
   macro avg       0.46      0.46      0.45       479
weighted avg       0.65      0.68      0.66       479



In [10]:
# Train Decision Tree model
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)
y_pred_decision_tree = decision_tree.predict(X_test)
decision_tree_accuracy = accuracy_score(y_test, y_pred_decision_tree)
print(f'Decision Tree Accuracy: {decision_tree_accuracy}')
print(confusion_matrix(y_test, y_pred_decision_tree))
print(classification_report(y_test, y_pred_decision_tree))



Decision Tree Accuracy: 0.6200417536534447
[[  4  10   5   1   2]
 [  2  25  14   3   5]
 [  4  19  37  16   9]
 [  1   6  19  35  25]
 [  5   5   9  22 196]]
              precision    recall  f1-score   support

         0.0       0.25      0.18      0.21        22
         1.0       0.38      0.51      0.44        49
         2.0       0.44      0.44      0.44        85
         3.0       0.45      0.41      0.43        86
         4.0       0.83      0.83      0.83       237

    accuracy                           0.62       479
   macro avg       0.47      0.47      0.47       479
weighted avg       0.62      0.62      0.62       479



In [11]:
# Train Random Forest model
random_forest = RandomForestClassifier(random_state=42)
random_forest.fit(X_train, y_train)
y_pred_random_forest = random_forest.predict(X_test)
random_forest_accuracy = accuracy_score(y_test, y_pred_random_forest)
print(f'Random Forest Accuracy: {random_forest_accuracy}')
print(confusion_matrix(y_test, y_pred_random_forest))
print(classification_report(y_test, y_pred_random_forest))

Random Forest Accuracy: 0.6889352818371608
[[  2   9   7   2   2]
 [  1  26  16   2   4]
 [  0  15  42  23   5]
 [  0   1  19  37  29]
 [  0   1   3  10 223]]
              precision    recall  f1-score   support

         0.0       0.67      0.09      0.16        22
         1.0       0.50      0.53      0.51        49
         2.0       0.48      0.49      0.49        85
         3.0       0.50      0.43      0.46        86
         4.0       0.85      0.94      0.89       237

    accuracy                           0.69       479
   macro avg       0.60      0.50      0.50       479
weighted avg       0.68      0.69      0.67       479



In [13]:
# Compare model accuracies
model_accuracies = {
    'Logistic Regression': log_reg_accuracy,
    'Decision Tree': decision_tree_accuracy,
    'Random Forest': random_forest_accuracy
}
print(model_accuracies)

{'Logistic Regression': 0.6826722338204593, 'Decision Tree': 0.6200417536534447, 'Random Forest': 0.6889352818371608}
