## Importing libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, roc_curve, auc
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
from tensorflow.keras.datasets import fashion_mnist
from scipy.stats import mode

## Load & Filter Data

In [None]:
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()

# Use first 5 classes only (0–4)
mask_train = y_train_full < 5
mask_test = y_test < 5

X_train_full = X_train_full[mask_train]
y_train_full = y_train_full[mask_train]

X_test = X_test[mask_test]
y_test = y_test[mask_test]

## Normalize Data

In [None]:
X_train_full = X_train_full.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Flatten from (28x28) → (784)
X_train_full = X_train_full.reshape(X_train_full.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

## Split Train/Validation

In [None]:
X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=0.2, random_state=42, stratify=y_train_full
)

## Standardize

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

print("Training samples:", X_train.shape[0])
print("Validation samples:", X_val.shape[0])
print("Testing samples:", X_test.shape[0])

## Implementing Logistic Regression

In [None]:
lr_model = SGDClassifier(
    loss='log_loss',
    learning_rate='constant',
    eta0=0.0001,
    max_iter=1,
    warm_start=True,
    random_state=42
)

train_loss = []
val_loss = []
epochs = 30
classes = np.unique(y_train)

for epoch in range(epochs):
    lr_model.partial_fit(X_train_scaled, y_train, classes=classes)

    y_train_prob = lr_model.predict_proba(X_train_scaled)
    y_val_prob = lr_model.predict_proba(X_val_scaled)

    train_loss.append(log_loss(y_train, y_train_prob))
    val_loss.append(log_loss(y_val, y_val_prob))

y_pred_lr = lr_model.predict(X_test_scaled)
acc_lr = accuracy_score(y_test, y_pred_lr)
print(f"Logistic Regression Accuracy: {acc_lr:.4f}")

## Plotting the Loss Curve

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(range(1, epochs+1), train_loss, label='Training Loss')
plt.plot(range(1, epochs+1), val_loss, label='Validation Loss')
plt.title("Logistic Regression Loss Curve")
plt.xlabel("Epochs")
plt.ylabel("Log Loss")
plt.legend()
plt.grid(True)
plt.show()

## Plotting the Confusion Matrix

In [None]:
cm_lr = confusion_matrix(y_test, y_pred_lr)
disp_lr = ConfusionMatrixDisplay(confusion_matrix=cm_lr, display_labels=np.arange(5))
disp_lr.plot(cmap=plt.cm.Blues)
plt.title("Logistic Regression Confusion Matrix")
plt.show()

## Plotting the Confusion Matrix

In [None]:
y_scores = lr_model.predict_proba(X_test_scaled)
fpr = {}
tpr = {}
roc_auc = {}

plt.figure(figsize=(8, 6))
for i in range(5):
    fpr[i], tpr[i], _ = roc_curve((y_test == i).astype(int), y_scores[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])
    plt.plot(fpr[i], tpr[i], label=f"Class {i} AUC = {roc_auc[i]:.3f}")

plt.plot([0, 1], [0, 1], "k--")
plt.title("Logistic Regression ROC Curve")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.grid(True)
plt.show()

## Implementing K-Means Clustering

In [None]:
kmeans = KMeans(n_clusters=5, random_state=42)
kmeans.fit(X_train_scaled)

# Predict clusters
clusters = kmeans.predict(X_test_scaled)

# Map clusters to real labels using training set
train_clusters = kmeans.predict(X_train_scaled)
from scipy.stats import mode

labels_map = {}
for c in range(5):
    mask = (train_clusters == c)
    if np.sum(mask) == 0:
        labels_map[c] = 0
    else:
        m = mode(y_train[mask], keepdims=True)
        if hasattr(m, 'mode'):
            labels_map[c] = m.mode[0] if not np.isscalar(m.mode) else m.mode
        else:
            labels_map[c] = m[0][0]

# Final K-Means prediction
y_pred_kmeans = np.array([labels_map[c] for c in clusters])

acc_kmeans = accuracy_score(y_test, y_pred_kmeans)
print("\n==============================")
print("K-Means Accuracy (mapped):", acc_kmeans)
print("==============================\n")

## Plotting the Confusion Matrix

In [None]:
cm_kmeans = confusion_matrix(y_test, y_pred_kmeans)
disp_kmeans = ConfusionMatrixDisplay(confusion_matrix=cm_kmeans, display_labels=np.arange(5))
disp_kmeans.plot(cmap=plt.cm.Oranges)
plt.title("K-Means Confusion Matrix")
plt.show()