# **CS4341 Assignment 3 - Alex Li**

The following is code for Assignment 3 of CS4341. I have 3 models here: KNN, Decision Tree, Naive Bayes. Each model is in a seperate code block for organization purposes.

**Model 1: KNN**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load dataset
df = pd.read_csv("/content/student-mat_modified.csv")

# Drop rows with missing values(CLEANING DATASET)
df = df.dropna()

# Separate features and target
X = df.drop(columns=['Performance'])  # Target is 'Performance'
y = df['Performance']

# One-hot encode categorical features
X_encoded = pd.get_dummies(X, drop_first=True)

# Encode target labels (High, Normal, Low)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded
)

# Standardize feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train kNN classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)

# Predict on test data
y_pred = knn.predict(X_test_scaled)

# Evaluate 
print("KNN Classifier Evaluation")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


KNN Classifier Evaluation
Accuracy: 0.47058823529411764

Classification Report:
              precision    recall  f1-score   support

        High       0.42      0.56      0.48        39
         Low       0.38      0.29      0.33        31
      Normal       0.58      0.51      0.54        49

    accuracy                           0.47       119
   macro avg       0.46      0.45      0.45       119
weighted avg       0.48      0.47      0.47       119



**Model 2: Decision Tree**


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load dataset
df = pd.read_csv("/content/student-mat_modified.csv")

# Drop missing values(CLEANNG DATASET)
df = df.dropna()

# Separate features and target
X = df.drop(columns=['Performance'])
y = df['Performance']

# One-hot encode categorical features
X_encoded = pd.get_dummies(X, drop_first=True)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded
)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Decision Tree
dtree = DecisionTreeClassifier(random_state=42)
dtree.fit(X_train_scaled, y_train)

# Predict
y_pred = dtree.predict(X_test_scaled)

# Evaluate
print("Decision Tree Classifier Evaluation")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


Decision Tree Classifier Evaluation
Accuracy: 0.42857142857142855

Classification Report:
              precision    recall  f1-score   support

        High       0.47      0.44      0.45        39
         Low       0.40      0.39      0.39        31
      Normal       0.42      0.45      0.43        49

    accuracy                           0.43       119
   macro avg       0.43      0.42      0.43       119
weighted avg       0.43      0.43      0.43       119



**Model 3: Naïve Bayes**

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, accuracy_score

# Load dataset
df = pd.read_csv("/content/student-mat_modified.csv")

# Drop missing values(CLEANING DATASET)
df = df.dropna()

# Separate features and target
X = df.drop(columns=['Performance'])
y = df['Performance']

# One-hot encode categorical features
X_encoded = pd.get_dummies(X, drop_first=True)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded
)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Naiive Bayes
nb = GaussianNB()
nb.fit(X_train_scaled, y_train)

# Predict
y_pred = nb.predict(X_test_scaled)

# Evaluate
print("Naive Bayes Classifier Evaluation")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


Naive Bayes Classifier Evaluation
Accuracy: 0.24369747899159663

Classification Report:
              precision    recall  f1-score   support

        High       0.67      0.10      0.18        39
         Low       0.24      0.81      0.36        31
      Normal       0.00      0.00      0.00        49

    accuracy                           0.24       119
   macro avg       0.30      0.30      0.18       119
weighted avg       0.28      0.24      0.15       119

