In [1]:
# Python MLTool - Classification Algorithms
# Unit test using Oriented Object Programming

# Last Update: 2020/03/23

In [2]:
# General Definitions

OPT = {'prob1':'iris',     # Which data set will be used 
       'prob2':1,          # More details about a specific data set
       'norm':'zscore',    # Normalization definition
       'lbl':'bipolar',    # Labeling Type
       'Nr':5,             # Number of repetitions of algorithm (generate statistics)
       'hold':'aleatory',  # hold out method
       'ptrn':0.8,         # Percentage of samples for training
       'file':'f1.txt'     # file where all the variables will be saved
      }

print(OPT)

{'prob1': 'iris', 'prob2': 1, 'norm': 'zscore', 'lbl': 'bipolar', 'Nr': 5, 'hold': 'aleatory', 'ptrn': 0.8, 'file': 'f1.txt'}


In [3]:
# Data loading and pre-processing

from data_object import *

data = DATA()

# Load DataSet

data.class_loading(problem = OPT['prob1'], problem_detail = OPT['prob2'])

# Encode Labels

data.label_encode(label_type = OPT['lbl'])

# Apply hold out (from data class)

data.hold_out(hold_method = OPT['hold'], train_size = OPT['ptrn'])

# Apply hold out (from scikit learn)

#from sklearn.model_selection import train_test_split
#X_tr, X_ts, y_tr, y_ts = train_test_split(data.input,data,output,test_size=0.25,random_state=42)

# Normalize Inputs (just normalize after hold out)

data.normalize(norm_type = OPT['norm'])

In [4]:
# Information about Dataset

print("Number of Samples: " + str(data.N))
print("Number of Attributes: " + str(data.p))
print("Number of Classes: " + str(data.Nc))
print()

print("First five samples of DataSet:")
display(data.input.shape)
display(data.input[0:5,:])
display(data.output[0:5,:])
print()

print("First five samples of Training:")
display(data.X_tr.shape)
display(data.X_tr[0:5,:])
display(data.y_tr[0:5,:])
print()

print("First five samples of Test:")
display(data.X_ts.shape)
display(data.X_ts[0:5,:])
display(data.y_ts[0:5,:])
print()

Number of Samples: 150
Number of Attributes: 4
Number of Classes: 3

First five samples of DataSet:


(150, 4)

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

array([[ 1, -1, -1],
       [ 1, -1, -1],
       [ 1, -1, -1],
       [ 1, -1, -1],
       [ 1, -1, -1]])


First five samples of Training:


(120, 4)

array([[-0.56416501,  1.41597974, -1.30452574, -1.35043321],
       [-1.05121394, -1.71408073, -0.28396232, -0.29299774],
       [ 1.01874401,  0.52167674,  1.07678891,  1.16097603],
       [ 1.14050624, -0.1490505 ,  0.96339297,  1.16097603],
       [-0.56416501,  1.86313123, -1.41792168, -1.08607435]])

array([[ 1, -1, -1],
       [-1,  1, -1],
       [-1, -1,  1],
       [-1, -1,  1],
       [ 1, -1, -1]])


First five samples of Test:


(30, 4)

array([[-1.41650063,  0.298101  , -1.24782778, -1.35043321],
       [ 0.16640838, -0.1490505 ,  0.5665072 ,  0.76443773],
       [ 0.16640838, -0.37262625,  0.39641329,  0.36789942],
       [ 0.53169508, -1.71408073,  0.33971532,  0.10354056],
       [ 0.77521954, -0.1490505 ,  0.79329907,  1.02879659]])

array([[ 1, -1, -1],
       [-1, -1,  1],
       [-1,  1, -1],
       [-1,  1, -1],
       [-1, -1,  1]])




In [5]:
# Training and Test (OOP)

from ols_classifier import *
from lssvm_classifier import *

# OLS

ols = OLS(aprox=1)

ols.fit(data.X_tr,data.y_tr)

y_h1 = ols.predict(data.X_ts)

# LSSVM

lssvm = LSSVM()

lssvm.fit(data.X_tr,data.y_tr)

y_h2 = lssvm.predict(data.X_ts)

In [6]:
# Information About Parameters and Outputs

print(y_h1.shape)
print(ols.W.shape)


(30, 3)
(3, 5)


In [7]:
# Display Results (Statistics)

from statistics_object import *

stats = STATSCLASS()

stats.calculate(data.y_ts,y_h1)

display(stats.confusion_matrix)
display(stats.accuracy)

array([[11.,  0.,  0.],
       [ 0.,  7.,  3.],
       [ 0.,  1.,  8.]])

0.8666666666666667

In [8]:
# Display Results (Statistics)

from statistics_object import *

stats = STATSCLASS()

stats.calculate(data.y_ts,y_h2)

display(stats.confusion_matrix)
display(stats.accuracy)

array([[11.,  0.,  0.],
       [ 0., 10.,  0.],
       [ 0.,  1.,  8.]])

0.9666666666666667

In [24]:
# Save Objects 

# See: https://stackoverflow.com/questions/4529815/saving-an-object-data-persistence