In [17]:
# Python MLTool - PCA
# Unit test of PCA and Scikit Learn Package

# Last Update: 2020/03/23

In [None]:
# General Definitions

OPT = {'prob1':1,          # Which data set will be used 
       'prob2':2,          # More details about a specific data set
       'norm':3,           # Normalization definition
       'lbl':0,            # Labeling Definition
       'Nr':5,             # Number of repetitions of algorithm
       'hold':1,           # hold out method
       'ptrn':0.7,         # Percentage of samples for training
       'file':'fileX.txt'  # file where all the variables will be saved
      }

print(OPT)

In [19]:
# Data loading and pre-processing

from data_object import *

data = DATA()

# Load DataSet

data.class_loading(problem = OPT['prob1'], problem_detail = OPT['prob2'])

# Encode Labels

data.label_encode(label_type = OPT['lbl'])

# Apply hold out (from data class)

data.hold_out(hold_method = OPT['hold'], train_size = OPT['ptrn'])

# Apply hold out (from scikit learn)

#from sklearn.model_selection import train_test_split
#X_tr, X_ts, y_tr, y_ts = train_test_split(data.input,data,output,test_size=0.25,random_state=42)

# Normalize Inputs (just normalize after hold out)

data.normalize(norm_type = OPT['norm'])

In [20]:
# Information about Dataset

print("Number of Samples: " + str(data.N))
print("Number of Attributes: " + str(data.p))
print("Number of Classes: " + str(data.Nc))
print()

print("First five samples of DataSet:")
display(data.input.shape)
display(data.input[0:5,:])
display(data.output[0:5,:])
print()

print("First five samples of Training:")
display(data.X_tr.shape)
display(data.X_tr[0:5,:])
display(data.y_tr[0:5,:])
print()

print("First five samples of Test:")
display(data.X_ts.shape)
display(data.X_ts[0:5,:])
display(data.y_ts[0:5,:])
print()

Number of Samples: 150
Number of Attributes: 4
Number of Classes: 3

First five samples of DataSet:


(150, 4)

array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  3. ,  1.4,  0.2],
       [ 4.7,  3.2,  1.3,  0.2],
       [ 4.6,  3.1,  1.5,  0.2],
       [ 5. ,  3.6,  1.4,  0.2]])

array([[ 1, -1, -1],
       [ 1, -1, -1],
       [ 1, -1, -1],
       [ 1, -1, -1],
       [ 1, -1, -1]])


First five samples of Training:


(120, 4)

array([[ 0.95446328, -0.35217197,  0.4945318 ,  0.14129329],
       [ 1.69739685,  1.22977699,  1.35625228,  1.71852071],
       [ 0.45917422, -1.93412093,  0.43708377,  0.40416453],
       [ 0.70681875,  0.09981345,  1.01156409,  0.79847138],
       [-1.52198198,  0.77779157, -1.3438052 , -1.1730629 ]])

array([[-1,  1, -1],
       [-1, -1,  1],
       [-1,  1, -1],
       [-1, -1,  1],
       [ 1, -1, -1]])


First five samples of Test:


(30, 4)

array([[ 0.2115297 , -0.12617926,  0.60942787,  0.79847138],
       [ 0.83064101,  0.32580615,  0.78177196,  1.06134262],
       [-1.02669293,  0.77779157, -1.28635717, -1.30449851],
       [-0.15993709,  1.6817624 , -1.17146111, -1.1730629 ],
       [ 0.33535196, -0.35217197,  0.55197984,  0.27272891]])

array([[-1, -1,  1],
       [-1, -1,  1],
       [ 1, -1, -1],
       [ 1, -1, -1],
       [-1,  1, -1]])




In [21]:
# Training and Test (OOP)

from ols_classifier import *

ols = OLS(aprox=1)

ols.fit(data.X_tr,data.y_tr)

y_h = ols.predict(data.X_ts)

In [22]:
# Information About Parameters and Outputs

print(y_h.shape)
print(ols.W.shape)


(30, 3)
(3, 5)


In [23]:
# Display Results (Statistics)

from statistics_object import *

stats = STATSCLASS()

stats.calculate(data.y_ts,y_h)

display(stats.confusion_matrix)
display(stats.accuracy)

array([[ 10.,   0.,   0.],
       [  0.,   5.,   3.],
       [  0.,   3.,   9.]])

0.80000000000000004

In [24]:
# Save Objects 

# See: https://stackoverflow.com/questions/4529815/saving-an-object-data-persistence