In [14]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, log_loss
from sklearn.preprocessing import OneHotEncoder

In [None]:
#softmax
def softmax(Z):
    Zs = Z - np.max(Z, axis=1, keepdims=True)  # ổn định số học
    expZ = np.exp(Zs)
    return expZ / np.sum(expZ, axis=1, keepdims=True)



In [11]:
#Loss function
def cross_entropy(W, X, Y, epsilon=1e-12):
    P = softmax(X @ W)  # (n,K)
    loss = -np.mean(np.sum(Y * np.log(P + epsilon), axis=1))
    return loss, P

In [None]:
#gradient descent
def gd_softmax(X, Y, lr=0.1, epochs=500, tol=1e-7, verbose=True):
    n, d = X.shape
    K = Y.shape[1]
    W = np.zeros((d, K))
    history = []

    for t in range(1, epochs+1):
        loss, P = cross_entropy(W, X, Y)
        history.append(loss)

        # Gradient
        dW = (X.T @ (P - Y)) / n

        # Update
        W_new = W - lr * dW

        # Check convergence
        if np.linalg.norm(W_new - W) < tol:
            W = W_new
            if verbose:
                print(f"[Softmax] Early stop at epoch {t}, loss={loss:.6f}")
            break

        W = W_new
        if verbose and (t == 1 or t % 50 == 0):
            print(f"[Softmax] epoch {t:4d} loss={loss:.6f}")

    return W, history

In [None]:
# --- Predict ---
def predict(X, W):
   P = softmax(X @ W)       # X: (n,d), W: (d,K) -> P: (n,K)
   y_pred = np.argmax(P, axis=1)
   return y_pred, P


## Apply

In [44]:
# ---- Load data ----
X_tr = pd.read_csv(
    r"C:\Users\admin\Documents\Study\Master\Optimization\optimization-project\Data\Train_test\X_train.csv"
).drop(columns=["strata"], errors="ignore").values
Y_tr = pd.read_csv(
    r"C:\Users\admin\Documents\Study\Master\Optimization\optimization-project\Data\Train_test\y_train.csv"
).values.ravel()
X_te = pd.read_csv(
    r"C:\Users\admin\Documents\Study\Master\Optimization\optimization-project\Data\Train_test\X_test.csv"
).drop(columns=["strata"], errors="ignore").values

Y_te = pd.read_csv(
    r"C:\Users\admin\Documents\Study\Master\Optimization\optimization-project\Data\Train_test\y_test.csv"
).values.ravel()


In [None]:
#SCALE FEATURES 

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_tr = scaler.fit_transform(X_tr)
X_te = scaler.transform(X_te)


In [None]:
enc = OneHotEncoder(sparse_output=False)
Y_train = enc.fit_transform(Y_tr.reshape(-1,1))
Y_test  = enc.transform(Y_te.reshape(-1,1))   


In [47]:
# Huấn luyện với train
W, history = gd_softmax(X_tr, Y_train, lr=0.1, epochs=500)

# In loss cuối cùng
print("Final train loss:", history[-1])


[Softmax] epoch    1 loss=1.098612
[Softmax] epoch   50 loss=0.651533
[Softmax] epoch  100 loss=0.610248
[Softmax] epoch  150 loss=0.594145
[Softmax] epoch  200 loss=0.585915
[Softmax] epoch  250 loss=0.580960
[Softmax] epoch  300 loss=0.577614
[Softmax] epoch  350 loss=0.575162
[Softmax] epoch  400 loss=0.573261
[Softmax] epoch  450 loss=0.571729
[Softmax] epoch  500 loss=0.570462
Final train loss: 0.5704616087648411


In [49]:
y_pred_tr, P_tr = predict(X_tr, W)
print("Train acc:", (y_pred_tr == Y_tr).mean())

y_pred_te, P_te = predict(X_te, W)
print("Test  acc:", (y_pred_te == Y_te).mean())


Train acc: 0.7722829212190915
Test  acc: 0.7666666666666667
