# Laboratorium 11 - Spadek wzdłuż gradientu

In [1]:
import pandas as pd
from IPython.display import display
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy import linalg

sns.set_style("darkgrid")

In [2]:
labels = pd.read_csv("breast-cancer.labels", header=None, names=["name"])
column_names = labels["name"].tolist()


train_data = pd.read_csv("breast-cancer-train.dat", header=None, names=column_names)
validate_data = pd.read_csv(
    "breast-cancer-validate.dat", header=None, names=column_names
)

In [3]:
# Reprezentacja liniowa
A_train_linear = train_data.drop(["patient ID", "Malignant/Benign"], axis=1).values
A_validate_linear = validate_data.drop(
    ["patient ID", "Malignant/Benign"], axis=1
).values

# Reprezentacja kwadratowa
selected_features = [
    "radius (mean)",
    "perimeter (mean)",
    "area (mean)",
    "symmetry (mean)",
]


def create_quadratic_features(data):
    quadratic_features = data[selected_features].copy()
    for feature in selected_features:
        quadratic_features[f"{feature}^2"] = data[feature] ** 2
    for i in range(len(selected_features)):
        for j in range(i + 1, len(selected_features)):
            feature1 = selected_features[i]
            feature2 = selected_features[j]
            quadratic_features[f"{feature1}*{feature2}"] = (
                data[feature1] * data[feature2]
            )
    return quadratic_features.values


A_train_quadratic = create_quadratic_features(train_data)
A_validate_quadratic = create_quadratic_features(validate_data)

In [4]:
# Wektor b dla zbioru treningowego
b_train = np.array([[1, 0] if row == "M" else [0, 1] for row in train_data["Malignant/Benign"]])

# Wektor b dla zbioru walidacyjnego
b_validate = np.array([[1, 0] if row == "M" else [0, 1] for row in validate_data["Malignant/Benign"]])

In [None]:
def softmax(Z):
    Z -= np.max(Z, axis=1).reshape(-1, 1)
    E = np.e ** Z
    SUM = np.sum(E, axis=1).reshape(-1, 1)
    P = E / SUM
    
    return P

In [6]:
def xentropy(S, T):
    n = len(S)
    return - (np.sum(T * np.log(S)) / n)

In [7]:
def grad_xentropy(X, S, T):
    n = len(S)
    return X.T @ (S - T) / n

In [8]:
def classify(W, X):
    sf_max = softmax(X @ W)
    return (sf_max == np.max(sf_max, axis=1, keepdims=True))

In [None]:
def calc_acc(P, T):
    accuracy = np.sum(P * T) / P.shape[0]
    return 100.0 * accuracy


def print_log(step, cost, train_acc, val_acc):
    log = 'Step {:3d}\tcost value: {:5.2f},\ttrain accuracy: {:5.2f},\t' \
          'validation accuracy: {:5.2f}'
    log = log.format(step, cost.item(), train_acc.item(), val_acc.item())
    
    print(log)


In [None]:
def gd_fit(W0, X, T, X_val, T_val, lr=1.0, steps=100, log_every=5):
    n = X.shape[0]
    W = np.copy(W0)
    M = 0
    mu = 0.9
    
    for step in range(steps):
        sf_max = softmax(X @ W)
        cost_val = xentropy(sf_max, T)
        
        cost_grad = grad_xentropy(X, sf_max, T)
        M = mu * M - lr * cost_grad
        W = W + M
        
        P_train = classify(W, X)
        train_acc = calc_acc(P_train, T)
        
        P_val = classify(W, X_val)
        val_acc = calc_acc(P_val, T_val)
        
        if step == 0 or (step + 1) % log_every == 0:
            print_log(step+1, cost_val, train_acc, val_acc)
    
    return W

In [11]:
ATA_eigenvalues, _ = np.linalg.eig(A_train_linear.T @ A_train_linear)
lambda_min = np.min(ATA_eigenvalues)
lambda_max = np.max(ATA_eigenvalues)
condition_no = lambda_max / lambda_min

#conservative learning rate due to very high condition coefficient
lr = 1 / lambda_max

In [12]:
X = np.column_stack([A_train_linear, np.full(A_train_linear.shape[0], 1)])
T = b_train
X_val = np.column_stack([A_validate_linear, np.full(A_validate_linear.shape[0], 1)])
T_val = b_validate

print(X.shape, T.shape, X_val.shape, T_val.shape)

(300, 31) (300, 2) (260, 31) (260, 2)


In [13]:
W0 = np.random.randn(31, 2)
W0 = np.zeros((31,2))
#print(X)
#print(W0)
#print(softmax(X @ W0))


In [14]:
W = gd_fit(W0, X, T, X_val, T_val, lr=3.0, steps = 300, log_every=10)

Step   1	cost value:  0.69,	train accuracy: 48.67,	validation accuracy: 23.08
Step  10	cost value:   nan,	train accuracy: 48.67,	validation accuracy: 23.85
Step  20	cost value:   nan,	train accuracy: 72.67,	validation accuracy: 85.38
Step  30	cost value:   nan,	train accuracy: 83.00,	validation accuracy: 69.62
Step  40	cost value:   nan,	train accuracy: 90.67,	validation accuracy: 87.69
Step  50	cost value:   nan,	train accuracy: 90.33,	validation accuracy: 89.23
Step  60	cost value:   nan,	train accuracy: 85.00,	validation accuracy: 73.46
Step  70	cost value:   nan,	train accuracy: 91.33,	validation accuracy: 89.62
Step  80	cost value:   nan,	train accuracy: 90.33,	validation accuracy: 81.15
Step  90	cost value:   nan,	train accuracy: 91.33,	validation accuracy: 82.31
Step 100	cost value:   nan,	train accuracy: 91.67,	validation accuracy: 84.23
Step 110	cost value:   nan,	train accuracy: 91.67,	validation accuracy: 84.23
Step 120	cost value:   nan,	train accuracy: 91.67,	validation ac

  return - (np.sum(T * np.log(S)) / n)
  return - (np.sum(T * np.log(S)) / n)
