In [1]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
dry_bean = fetch_ucirepo(id=602) 
  
# X shows the input datas for each of the beans. Whearas Y tells the output, name of the bean
X = dry_bean.data.features 
y = dry_bean.data.targets 

#shows how the data structure looks
print(X[:5])
print(y[:5])


    Area  Perimeter  MajorAxisLength  MinorAxisLength  AspectRatio  \
0  28395    610.291       208.178117       173.888747     1.197191   
1  28734    638.018       200.524796       182.734419     1.097356   
2  29380    624.110       212.826130       175.931143     1.209713   
3  30008    645.884       210.557999       182.516516     1.153638   
4  30140    620.134       201.847882       190.279279     1.060798   

   Eccentricity  ConvexArea  EquivDiameter    Extent  Solidity  Roundness  \
0      0.549812       28715     190.141097  0.763923  0.988856   0.958027   
1      0.411785       29172     191.272751  0.783968  0.984986   0.887034   
2      0.562727       29690     193.410904  0.778113  0.989559   0.947849   
3      0.498616       30724     195.467062  0.782681  0.976696   0.903936   
4      0.333680       30417     195.896503  0.773098  0.990893   0.984877   

   Compactness  ShapeFactor1  ShapeFactor2  ShapeFactor3  ShapeFactor4  
0     0.913358      0.007332      0.003147 

In [None]:
# Pre-processing of the data for later machine learning

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
import numpy as np

# Flattens to 1D array
y = np.array(y).ravel()

# Encodes the labels from string to int
label_encoder = LabelEncoder()
y_int = label_encoder.fit_transform(y)

# makes 1 to 1 0 0 0 0 0 for hot encoding
y_int = y_int.reshape(-1, 1) 
onehot_encoder = OneHotEncoder(sparse_output=False)  
y_onehot = onehot_encoder.fit_transform(y_int)

#unit balance, centered at zero, makes the machine learning faster
def normalization(x):
    X = np.array(x, dtype=float)
    X_norm = np.zeros_like(X)
    
    for i in range(X.shape[1]):
        inputX = X[:, i]
        X_norm[:, i] = (inputX - np.mean(inputX)) / np.std(inputX)
    
    return X_norm

X_scaled = normalization(X)


print("Scaled features shape:", X_scaled[:,2])
print("One-hot labels shape:", y_onehot[1])


Scaled features shape: [-1.30659814 -1.39591111 -1.25235661 ... -0.45047814 -0.42897404
 -0.2917356 ]
One-hot labels shape: [0. 0. 0. 0. 0. 1. 0.]


In [3]:
# splittig the data to training group and test group
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_onehot, test_size=0.2, random_state=42, stratify=y
)

print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])


Training samples: 10888
Testing samples: 2723


In [None]:
#defining functions for the learning
# Sigmoid can not be used for multivariate logistic training, softmax is used for activation function instead all the functions from lab2/ 4
def softmax(z):
    exp_z = np.exp(z)
    return exp_z / np.sum(exp_z)

def compute_cost_multiclass(X, y, W, b):
    m = X.shape[0]
    total_cost = 0
    
    for i in range(m):
        z_i = np.dot(X[i], W) + b  
        f_wb_i = softmax(z_i)      
        total_cost += -np.sum(y[i] * np.log(f_wb_i))
    
    total_cost /= m
    return total_cost

num_classes = y_train.shape[1]
num_features = X_train.shape[1]

W = np.zeros((num_features, num_classes))
b = np.zeros(num_classes)

cost = compute_cost_multiclass(X_train, y_train, W, b)
print('Cost at initial W (zeros): {:.3f}'.format(cost))

def compute_gradient_multiclass(X, y, W, b, regression, lambda_):
    
    m, n = X.shape
    #shows how many neurons there are, for us it is 7
    num_classes = y.shape[1]
    
    dj_dw = np.zeros_like(W) 
    dj_db = np.zeros_like(b)  
    
    for i in range(m):
        z_i = np.dot(X[i], W) + b  
        f_wb = softmax(z_i)               
        diff = f_wb - y[i]                 
        
        dj_dw += np.outer(X[i], diff)       
        dj_db += diff                        
    
    dj_dw /= m
    dj_db /= m
    if regression:
        dj_dw += (lambda_ / m) * W

    return dj_db, dj_dw

import math
import numpy as np

def gradient_descent_multiclass(X, y, W_in, b_in, cost_function, gradient_function, alpha, num_iters, regression, lambda_): 
    
    J_history = []
    W_history = []

    for i in range(num_iters):
        # Compute gradients
        dj_db, dj_dw = gradient_function(X, y, W_in, b_in, regression, lambda_)
        
        # Update parameters
        W_in = W_in - alpha * dj_dw
        b_in = b_in - alpha * dj_db
        
        # Compute and save cost
        if i < 100000: 
            cost = cost_function(X, y, W_in, b_in)
            J_history.append(cost)


        if i % max(1, math.ceil(num_iters / 10)) == 0 or i == (num_iters - 1):
            W_history.append(W_in.copy())  
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.3f}")
    
    return W_in, b_in, J_history, W_history

def predict_multiclass(X, W, b):
    
    Z = np.dot(X, W) + b  
    exp_Z = np.exp(Z)  
    probs = exp_Z / np.sum(exp_Z)
    p = np.argmax(probs, axis=1)  
    return p

#Regularization functions (optional)


Cost at initial W (zeros): 1.946


In [None]:
import numpy as np

np.random.seed(1)

# first randomly assigns W and b values for the training
num_features = X_train.shape[1]
num_classes = y_train.shape[1]

W_init = 0.01 * (np.random.rand(num_features, num_classes) - 0.5)
b_init = 0.01 * (np.random.rand(num_classes) - 0.5)

regression = False
iterations = 400  
alpha = 0.03      
lambda_ = 0.001

#train
W_final, b_final, J_history, W_history = gradient_descent_multiclass(
    X_train, y_train, W_init, b_init,
    compute_cost_multiclass,
    compute_gradient_multiclass,
    alpha,
    iterations,
    regression,
    lambda_
)

print("Final cost after gradient descent:", J_history[-1])

y_tested = predict_multiclass(X_test, W_final, b_final)

bean_classes = ['SEKER', 'BARBUNYA', 'BOMBAY', 'CALI', 'DERMOSAN', 'HOROZ', 'SIRA']
predicted_beans = [bean_classes[i] for i in y_tested]
for i in range(10):
    print(f"Predicted bean: {predicted_beans[i]}")

# Accurracy
# finds the maximum value from the list meaning that in the hot encoding 001000, shows which bean is selected
y_true_test = np.argmax(y_test, axis=1)
accuracy = np.mean(y_true_test == y_true_test)
print("Test set accuracy:", accuracy)


Iteration    0: Cost    1.899
Iteration   40: Cost    1.147
Iteration   80: Cost    0.932
Iteration  120: Cost    0.817
Iteration  160: Cost    0.741
Iteration  200: Cost    0.683
Iteration  240: Cost    0.638
Iteration  280: Cost    0.601
Iteration  320: Cost    0.571
Iteration  360: Cost    0.545


In [None]:
# defining functions for the learning
# Softmax for multiclass
def softmax(z):
    exp_z = np.exp(z)
    return exp_z / np.sum(exp_z)

# --------- COST WITH L2 REGULARIZATION ----------
def compute_cost_multiclass_reg(X, y, W, b, lambda_):
    m = X.shape[0]
    total_cost = 0
    
    for i in range(m):
        z_i = np.dot(X[i], W) + b  
        f_wb_i = softmax(z_i)
        total_cost += -np.sum(y[i] * np.log(f_wb_i))

    total_cost /= m

    # L2 penalty
    reg_cost = (lambda_ / (2 * m)) * np.sum(W * W)

    return total_cost + reg_cost

# initialize W, b
num_classes = y_train.shape[1]
num_features = X_train.shape[1]

W = np.zeros((num_features, num_classes))
b = np.zeros(num_classes)

# Example cost at initialization
cost = compute_cost_multiclass_reg(X_train, y_train, W, b, lambda_=0.1)
print('Cost at initial W (zeros): {:.3f}'.format(cost))

# --------- GRADIENT WITH L2 REGULARIZATION ----------
def compute_gradient_multiclass_reg(X, y, W, b, lambda_):
    m, n = X.shape
    num_classes = y.shape[1]
    
    dj_dw = np.zeros_like(W)
    dj_db = np.zeros_like(b)
    
    for i in range(m):
        z_i = np.dot(X[i], W) + b
        f_wb = softmax(z_i)
        diff = f_wb - y[i]
        
        dj_dw += np.outer(X[i], diff)
        dj_db += diff
    
    dj_dw /= m
    dj_db /= m

    # L2 gradient (no bias)
    dj_dw += (lambda_ / m) * W
    
    return dj_db, dj_dw

# --------- REGULARIZED GRADIENT DESCENT ----------
def gradient_descent_multiclass(X, y, W, b, alpha, num_iters, lambda_):
    
    J_history = []
    W_history = []

    for i in range(num_iters):

        dj_db, dj_dw = compute_gradient_multiclass_reg(X, y, W, b, lambda_)

        W = W - alpha * dj_dw
        b = b - alpha * dj_db

        cost = compute_cost_multiclass_reg(X, y, W, b, lambda_)
        J_history.append(cost)

        if i % max(1, num_iters // 10) == 0:
            W_history.append(W.copy())
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.3f}")
    
    return W, b, J_history, W_history


def predict_multiclass(X, W, b):
    Z = np.dot(X, W) + b  
    exp_Z = np.exp(Z)  
    probs = exp_Z / np.sum(exp_Z)
    p = np.argmax(probs, axis=1)  
    return p


Cost at initial W (zeros): 1.946


In [15]:
import numpy as np

np.random.seed(1)

# first randomly assigns W and b values for the training
num_features = X_train.shape[1]
num_classes = y_train.shape[1]

W_init = 0.01 * (np.random.rand(num_features, num_classes) - 0.5)
b_init = 0.01 * (np.random.rand(num_classes) - 0.5)

iterations = 400  
alpha = 0.03      
lambda_ = 0.1     # choose any Î» you want

# ---- train model with REGULARIZATION ----
W_final, b_final, J_history, W_history = gradient_descent_multiclass(
    X_train, y_train, W_init, b_init,
    alpha=alpha,
    num_iters=iterations,
    lambda_=lambda_
)

print("Final cost after gradient descent:", J_history[-1])


y_tested = predict_multiclass(X_test, W_final, b_final)

y_true_test = np.argmax(y_test, axis=1)
accuracy = np.mean(y_tested == y_true_test)
print("Test set accuracy:", accuracy)


Iteration    0: Cost    1.899
Iteration   40: Cost    1.147
Iteration   80: Cost    0.932
Iteration  120: Cost    0.818
Iteration  160: Cost    0.741
Iteration  200: Cost    0.683
Iteration  240: Cost    0.638
Iteration  280: Cost    0.601
Iteration  320: Cost    0.571
Iteration  360: Cost    0.545
Final cost after gradient descent: 0.5225933574393123
Predicted bean: CALI
Predicted bean: BARBUNYA
Predicted bean: SIRA
Predicted bean: HOROZ
Predicted bean: SEKER
Predicted bean: HOROZ
Predicted bean: CALI
Predicted bean: SIRA
Predicted bean: BOMBAY
Predicted bean: DERMOSAN
Test set accuracy: 0.8806463459419758
