In [6]:
import math
import sys
import pickle
from math import log10 as log

import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt

from pandas.plotting import scatter_matrix
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix, f1_score, roc_curve, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder


In [31]:
# Load data
df_train_in = pd.read_csv('X_train.txt', header=None, delim_whitespace=True)
df_train_out = pd.read_csv('y_train.txt', header=None, delim_whitespace=True)

df_test_in = pd.read_csv('X_test.txt', header=None, delim_whitespace=True)
df_test_out = pd.read_csv('y_test.txt', header=None, delim_whitespace=True)

In [32]:
# Pre processing
'''
OneHotEncode    0–caminhada; 1 –subindoescadas; 2 –descendo escadas; 3 –sentado; 4 –em pé; 5 –deitado.
'''
encoder = OneHotEncoder(categories='auto')
encoder.fit(df_train_out)

df_train_out_scaled = encoder.transform(df_train_out).toarray()
df_test_out_scaled = encoder.transform(df_test_out).toarray()

# Normalization
scaler = StandardScaler() 
scaler.fit(df_train_in)

df_train_in_scaled = scaler.transform(df_train_in)
df_test_in_scaled = scaler.transform(df_test_in)

In [9]:
def sigmoid(x):
    return 1. / (1. + math.exp(-1*x))

def estimate(w, fi):
    return np.apply_along_axis(sigmoid, 1, fi.T.dot(w)).reshape(-1, 1)

def predict(yest, boundry):
    return 1 if yest > boundry else 0

In [12]:
# Training parameters
eta = 1e-4
tol = 1e-3
loss = 1

out_num = len(df_train_out_scaled[0])
ones = np.ones([len(df_train_in_scaled),1])
fi = np.concatenate((ones, df_train_in_scaled),axis=1).T
w = np.array([ np.random.uniform(-1,1,(len(fi[:,0]))) for i in range(out_num) ])

In [13]:
# Estimate
def estimate(out_num, fi, w):
    '''
    :param out_num: output columns qty
    :param fi: 1 + input matrix
    '''
    N = len(fi[0,:])
    eft_w = fi.T.dot(w.T)
    yest = np.zeros([N, out_num])
    for i in range(N):
        total = np.sum(np.exp(eft_w[i]))
        for j in range(out_num):
            yest[i][j] = math.exp(eft_w[i][j]) / total 
    return yest

In [14]:
# Training
ones = np.ones([len(df_train_in_scaled),1])
fi = np.concatenate((ones, df_train_in_scaled),axis=1).T
tol = 1e-8
while True:
    yest = estimate(out_num, fi, w)
    e = df_train_out_scaled - yest
    delt = np.array([e[:,i].dot(fi.T) for i in range(out_num)])
    w = w + delt*eta
    loss = np.array([(0 if t_==y_ else 1) for t_, y_ in zip(np.argmax(df_train_out_scaled, axis=1), np.argmax(yest, axis=1))])
    sys.stdout.write("\r{:09.8f} {:04d}".format(loss.mean(), loss.sum()))
    sys.stdout.flush()


0.01101741 0081

KeyboardInterrupt: 

In [26]:
# Predict
# labels = ['caminhada', 'subindoescadas', 'descendo escadas', 'sentado', 'em pé', 'Deitado']
# Training dataset CM 
ones = np.ones([len(df_train_in_scaled),1])
fi = np.concatenate((ones, df_train_in_scaled),axis=1).T
yest = estimate(out_num, fi, w)

cm = confusion_matrix(
    np.array([m for m in np.argmax(df_train_out_scaled, axis=1)]),
    np.array([m for m in np.argmax(yest, axis=1)]))
print('Train')
print(cm)
print('\n\n')
# Test dataset CM 
ones = np.ones([len(df_test_in_scaled),1])
fi = np.concatenate((ones, df_test_in_scaled),axis=1).T
yest = estimate(out_num, fi, w)

cm = confusion_matrix(
    np.array([m for m in np.argmax(df_test_out_scaled, axis=1)]),
    np.array([m for m in np.argmax(yest, axis=1)]))
print('Test')
print(cm)


Train
[[1226    0    0    0    0    0]
 [   0 1073    0    0    0    0]
 [   0    0  986    0    0    0]
 [   0    0    0 1245   41    0]
 [   0    0    0   39 1335    0]
 [   0    0    0    0    0 1407]]



Test
[[479   8   9   0   0   0]
 [  8 460   3   0   0   0]
 [ 11  33 376   0   0   0]
 [  0   2   0 428  58   3]
 [  0   0   0  16 516   0]
 [  0   0   0   0  24 513]]


In [33]:
# 0–caminhada; 1 –subindoescadas; 2 –descendo escadas; 3 –sentado; 4 –em pé; 5 –deitado.
print(classification_report(\
    np.array([m for m in np.argmax(df_test_out_scaled, axis=1)]),
    np.array([m for m in np.argmax(yest, axis=1)]), digits=6,\
    labels=range(6), target_names=['Caminhada','Subindo Escadas','Descendo Escadas','Sentado','em pé', 'Deitado']))

                  precision    recall  f1-score   support

       Caminhada   0.961847  0.965726  0.963783       496
 Subindo Escadas   0.914513  0.976645  0.944559       471
Descendo Escadas   0.969072  0.895238  0.930693       420
         Sentado   0.963964  0.871690  0.915508       491
           em pé   0.862876  0.969925  0.913274       532
         Deitado   0.994186  0.955307  0.974359       537

        accuracy                       0.940618      2947
       macro avg   0.944410  0.939089  0.940363      2947
    weighted avg   0.943691  0.940618  0.940761      2947



In [35]:
[m for m in np.argmax(df_test_out_scaled, axis=1)]

[4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
