# Function

In [19]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, log_loss
from sklearn.preprocessing import OneHotEncoder

In [20]:
def softmax(Z):
    Zs = Z - np.max(Z, axis=1, keepdims=True)
    expZ = np.exp(Zs)
    return expZ / np.sum(expZ, axis=1, keepdims=True)


In [21]:
def cross_entropy(W, X, Y, eps=1e-12):
    P = softmax(X @ W)
    loss = -np.mean(np.sum(Y * np.log(P + eps), axis=1))
    return loss, P

In [29]:
def sgd_softmax(X, Y, lr=0.05, epochs=100, shuffle=True, seed=42):
    """
    X: (n,d)
    Y: (n,K) one-hot
    """
    n, d = X.shape
    K = Y.shape[1]
    W = np.zeros((d, K))
    rng = np.random.default_rng(seed)

    for epoch in range(epochs):
        idx = np.arange(n)
        if shuffle:
            rng.shuffle(idx)

        for i in idx:
            xi = X[i:i+1, :]              # (1,d)
            yi = Y[i:i+1, :]              # (1,K)

            pi = softmax(xi @ W)          # (1,K)
            dW = xi.T @ (pi - yi)         # (d,K)
            W -= lr * dW

        loss, _ = cross_entropy(W, X, Y)
        print(f"[SGD] epoch {epoch+1} loss={loss:.6f}")
    return W

In [23]:
def predict(X, W):
    P = softmax(X @ W)
    y_pred = np.argmax(P, axis=1)
    return y_pred, P


# Apply

In [24]:
# ---- Load data ----
X_tr = pd.read_csv(
    r"C:\Users\admin\Documents\Study\Master\Optimization\optimization-project\Data\Train_test\X_train.csv"
).drop(columns=["strata"], errors="ignore").values
Y_tr = pd.read_csv(
    r"C:\Users\admin\Documents\Study\Master\Optimization\optimization-project\Data\Train_test\y_train.csv"
).values.ravel()
X_te = pd.read_csv(
    r"C:\Users\admin\Documents\Study\Master\Optimization\optimization-project\Data\Train_test\X_test.csv"
).drop(columns=["strata"], errors="ignore").values

Y_te = pd.read_csv(
    r"C:\Users\admin\Documents\Study\Master\Optimization\optimization-project\Data\Train_test\y_test.csv"
).values.ravel()

# Y_tr_raw, Y_te_raw là nhãn 1D
y_tr = Y_tr.copy()
y_te = Y_te.copy()


In [25]:
#SCALE FEATURES 

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_tr = scaler.fit_transform(X_tr)
X_te = scaler.transform(X_te)


In [26]:
enc = OneHotEncoder(sparse_output=False)
Y_tr = enc.fit_transform(y_tr.reshape(-1,1))
Y_te = enc.transform(y_te.reshape(-1,1))

In [31]:
W = sgd_softmax(X_tr, Y_tr, lr=0.05, epochs=100)

# Predict
y_pred_tr, _ = predict(X_tr, W)
y_pred_te, _ = predict(X_te, W)

from sklearn.metrics import accuracy_score
print("Train acc:", accuracy_score(y_tr, y_pred_tr))
print("Test  acc:", accuracy_score(y_te, y_pred_te))

[SGD] epoch 1 loss=0.808759
[SGD] epoch 2 loss=0.731033
[SGD] epoch 3 loss=0.924207
[SGD] epoch 4 loss=0.736483
[SGD] epoch 5 loss=0.871532
[SGD] epoch 6 loss=0.805170
[SGD] epoch 7 loss=0.739757
[SGD] epoch 8 loss=0.790381
[SGD] epoch 9 loss=0.746348
[SGD] epoch 10 loss=0.791290
[SGD] epoch 11 loss=0.821354
[SGD] epoch 12 loss=0.897502
[SGD] epoch 13 loss=0.779056
[SGD] epoch 14 loss=0.779089
[SGD] epoch 15 loss=0.825194
[SGD] epoch 16 loss=0.724977
[SGD] epoch 17 loss=0.756171
[SGD] epoch 18 loss=0.771898
[SGD] epoch 19 loss=0.793138
[SGD] epoch 20 loss=0.870974
[SGD] epoch 21 loss=0.669558
[SGD] epoch 22 loss=0.759083
[SGD] epoch 23 loss=0.769834
[SGD] epoch 24 loss=0.792464
[SGD] epoch 25 loss=0.809128
[SGD] epoch 26 loss=0.733155
[SGD] epoch 27 loss=0.775169
[SGD] epoch 28 loss=0.825575
[SGD] epoch 29 loss=0.797680
[SGD] epoch 30 loss=0.751542
[SGD] epoch 31 loss=0.780000
[SGD] epoch 32 loss=0.715111
[SGD] epoch 33 loss=0.953440
[SGD] epoch 34 loss=0.754851
[SGD] epoch 35 loss=0.7