In [46]:
#----------------------------
# import libraries
#----------------------------
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
# scikit-learn
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

In [47]:
def softmax(phi, w):
    # exponent matrix --> (phi @ w)
    exp_matrix = phi @ w
    # terms of exponent matrix --> exp(phi @ w)
    numerator = np.exp(exp_matrix)
    # sum of terms in exponent matrix --> exp(phi@w0) + ... + exp(phi@wK)
    # denominator of softmax function
    sum_numerator = np.sum(numerator, axis=1).reshape(-1,1)
    # copy sum (Nx1) in (K+1) columns
    denominator = np.repeat(sum_numerator, C, axis=1)
    # calculate softmax
    ŷ = numerator/denominator
    return ŷ

In [48]:
def counts(y):
    max_y = np.argmax(y, axis=1)
    classes, size = np.unique(max_y, return_counts=True)
    for i in range(len(classes)):
        print('label ', classes[i], ': ', size[i], sep='')

In [49]:
def max_pooling(y):
    # add one because output must go from 1 to 6
    y = np.argmax(y, axis=1) + 1
    y = y.reshape(-1,1)
    y = one_hot_encoder(y)
    return y

In [50]:
def zero_one_loss(y_true, y_pred):
    y = one_hot_decoder(y_true)
    ŷ = one_hot_decoder(y_pred)
    error = (y - ŷ)
    zero, counts = np.unique(error, return_counts=True)
    index = np.where(zero == 0)[0][0]
    loss = counts[index] / len(y)
    return loss

In [51]:
def iterate(x, y, N, phi, w, alpha):
    # calculate estimated output
    ŷ = softmax(phi, w)
    # calculate gradient
    #gradient = (ŷ - y).T @ phi
    gradient = (y - ŷ).T @ phi
    # calculate RMSE
    #rmse = np.mean(error * error)
    loss = zero_one_loss(y, ŷ)
    w = w - (alpha * gradient.T)
    return w, loss

In [52]:
def train(x, y, alpha=1e-7, tolerance=1e-15):
    # alpha: step size
    # tolerance: stop criteria
    #-----------------------------
    N = X_test.shape[0]                        # size of dataset
    K = X_test.shape[1]                        # number of features
    C = len(enc.categories_[0])                # number of classes
    #-----------------------------
    w = np.random.rand(K+1, C)                 # create weights matrix
    phi = np.insert(X_train, 0, [1], axis=1)   # create phi matrix
    #-----------------------------    
    # error array initialization
    e = []       
    #-----------------------------
    # run algorithm first time
    w, loss = iterate(x, y, N, phi, w, alpha)
    diff = loss
    e.append(loss)
    #-----------------------------
    # iterate until convergence
    while (diff > tolerance):
        w, loss = iterate(x, y, N, phi, w, alpha)
        diff = e[-1] - loss
        e.append(loss)
    #-----------------------------        
    return w, e

In [53]:
# y: (C x N)
# ŷ: (C x N)
# phi: (N x K+1)

# GRAD = (y - ŷ).T @ phi
# GRAD: (C x K+1)

# GRAD = [ w_0(0) w_1(0) ... w_K+1(0) ]
#        [ w_0(1) w_1(1) ... w_K+1(1) ]
#        [  ...    ...   ...   ...    ]
#        [ w_0(C) w_1(C) ... w_K+1(C) ]

# GRAD = [ ---------- wk(0) --------- ]
#        [ ---------- wk(1) --------- ]
#        [ ----------  ...  --------- ]
#        [ ---------- wk(C) --------- ]

In [54]:
# w = [ w0(0) w0(1) ... w0(C)]
#     [ w1(0) w1(1) ... w1(C)]
#     [ ...    ...  ...  ... ]
#     [ wK(0) wK(1) ... wK(C)]


# phi: (N x K+1)
#   w: (K+1 x C)

# (phi @ w): (N x C) --> exponents of terms in softmax

In [55]:
# STANDARDIZATION IN INPUT FEATURES!!!

# ... to be done ...

In [56]:
# path to save figures
path = 'figures/ex2/'
# read training data
X_train_df = pd.read_fwf('X_train.txt', header=None)
y_train_df = pd.read_csv('y_train.txt', header=None)
# read test data
X_test_df = pd.read_fwf('X_test.txt', header=None)
y_test_df = pd.read_csv('y_test.txt', header=None)

In [13]:
# convert DataFrame to NumPy array
X_train = X_train_df.to_numpy()
X_test = X_test_df.to_numpy()
y_train_raw = y_train_df.to_numpy()
y_test_raw = y_test_df.to_numpy()

In [14]:
enc = OneHotEncoder(categories='auto')
enc.fit(y_train_raw)
def one_hot_encoder(y):
    return enc.transform(y).toarray()
def one_hot_decoder(y):
    return enc.inverse_transform(y)

**output** <br>
1 – caminhada <br>
2 – subindo escadas <br>
3 – descendo escadas <br>
4 – sentado <br>
5 – em pé <br>
6 – deitado <br>
<br>
**one-hot enconding** <br>
[1 0 0 0 0 0]$^T$: walking <br>
[0 1 0 0 0 0]$^T$: climbing stairs <br>
[0 0 1 0 0 0]$^T$: going down stairs <br>
[0 0 0 1 0 0]$^T$: seated <br>
[0 0 0 0 1 0]$^T$: standing <br>
[0 0 0 0 0 1]$^T$: lying <br>


In [15]:
# one-hot encoding
y_train = one_hot_encoder(y_train_df)
y_test = one_hot_encoder(y_test_df)

In [16]:
# size of dataset
N = X_train.shape[0]
# number of features
K = X_train.shape[1]
# number of classes
C = len(enc.categories_[0])

In [17]:
# create weights matrix
#w = np.random.rand(K+1, C)
w = np.random.uniform(-1,1, (K+1, C))
# create phi matrix
phi = np.insert(X_train, 0, [1], axis=1)
x = X_train
y = y_train
alpha=1e-6

In [18]:
# for i in range(100):
#     ŷ = softmax(phi,w)
#     gradient = (y - ŷ).T @ phi
#     w = w - (alpha * gradient.T)
#     #counts(ŷ)

In [21]:
counts(y)

label 0: 1226
label 1: 1073
label 2: 986
label 3: 1286
label 4: 1374
label 5: 1407


In [22]:
counts(ŷ)

label 3: 7344
label 5: 8


In [23]:
w, e = train(x, y, alpha=1e-6, tolerance=1e-4)
ŷ = softmax(phi,w)
counts(ŷ)

label 0: 1527
label 1: 5736
label 3: 72
label 4: 9
label 5: 8


In [44]:
w = iterate(x, y, N, phi, w, alpha)[0]
ŷ = softmax(phi, w)
counts(ŷ)
zero_one_loss(y, max_pooling(ŷ))

label 0: 2
label 1: 7338
label 3: 4
label 5: 8


0.1459466811751904

In [260]:
np.set_printoptions(suppress=True)