# Objective

Here, we'll run through our training data to gather information and try to achieve the best possible model

In [1]:
import numpy as np
from sklearn.linear_model import LogisticRegression
import csv
import pandas as pd
import time
import sklearn.metrics as metrics

In [8]:
# Functions

def addColumnThetaZero (array):
    return np.c_[np.ones(array.shape[0]), array]

def formatArray (dataFrame, columnToExtract) :
    array = dataFrame.values
    target = array[:,columnToExtract]
    params = np.delete(array, columnToExtract, axis = 1)
    return params, target

def loadFashionTrainData():
    return pd.read_csv("fashion-mnist-dataset/fashion-mnist_train.csv")

def loadFashionTestData():
    return pd.read_csv("fashion-mnist-dataset/fashion-mnist_test.csv")

def split_train_test(data, test_ratio):
    np.random.seed(42)
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(len(data) * test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

def regressionLogisticCostFunction (results, model, X):
    agaTheta = model.predict_proba(X)
    n = X.shape[0]
    diference = results - agaTheta
    squareDiference = diference * diference
    return (np.sum(squareDiference)/(2*n))

def createTarget (target):
    results = np.zeros((target.size, 10), dtype=int)
    for i in range(10):
        for j in range(target.size):
            if (target[j] != i):
                results[j][i - 1] = 0
            else:
                results[j][i - 1] = 1
    return results


# The dataset

First and foremost, we'll open train and test data. The training data is split to obtain validation items and the the target values are also separated from the original data. Also, added a column for Bias.


In [9]:
fashionTrainDataset = loadFashionTrainData()
fashionTestDataset = loadFashionTestData()

trainSet, validationSet = split_train_test(fashionTrainDataset, 0.2)

fashionTrainParams, fashionTrainTarget = formatArray(trainSet, 0)
fashionValidationSetParams, fashionValidationSetTarget = formatArray(validationSet, 0)
fashionTestParams, fashionTestTarget = formatArray (fashionTestDataset)
trainTarget = createTarget(fashionTrainTarget)

fashionTrainParams = addColumnThetaZero(fashionTrainParams)
fashionValidationSetParams = addColumnThetaZero(fashionValidationSetParams)
fashionTestParams = addColumnThetaZero(fashionTestParams)

# Logistic Regression 1
- Multi-class choice: Multinomial
- Solver: Newton-CG
- Max Iteration: 10

In [12]:
penalt = 'l2'
solverMode = 'newton-cg'
maxIter = 10
multiClass = 'multinomial'

logisticModel = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode, penalty = penalt)

start = time.clock()
logisticModel.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel.predict(fashionValidationSetParams), fashionValidationSetTarget))
costFunction = regressionLogisticCostFunction(trainTarget, logisticModel, fashionTrainParams)
print (costFunction)


24.050830000000005
Score : 0.85175
0.85175
(array([0.80844156, 0.95741056, 0.76583333, 0.8784219 , 0.77552743,
       0.90184049, 0.59292035, 0.94689542, 0.95213229, 0.95289256]), array([0.81639344, 0.96315338, 0.76328904, 0.84639255, 0.72476341,
       0.9440367 , 0.65921288, 0.91838352, 0.94391717, 0.9427637 ]), array([0.81239804, 0.96027339, 0.76455907, 0.86210984, 0.74928659,
       0.9224563 , 0.62431173, 0.93242156, 0.94800693, 0.94780107]), array([1220, 1167, 1204, 1289, 1268, 1090, 1118, 1262, 1159, 1223]))
0.8858244965330608




# Logistic Regression 2
- Multi-class choice: Multinomial
- Solver: Newton-CG
- Max Iteration: 100

In [13]:
penalt = 'l2'
solverMode = 'newton-cg'
maxIter = 100
multiClass = 'multinomial'

logisticModel2 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel2.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel2.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel2.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel2.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction2 = regressionLogisticCostFunction(trainTarget, logisticModel2, fashionTrainParams)
print (costFunction2)

1835.302801
Score : 0.8318333333333333
0.8318333333333333
(array([0.79788961, 0.95655877, 0.75166667, 0.84782609, 0.76371308,
       0.89833479, 0.5695897 , 0.91666667, 0.91122715, 0.92066116]), array([0.78202068, 0.92276089, 0.75734677, 0.83704293, 0.73160873,
       0.90788308, 0.63554758, 0.90048154, 0.90727903, 0.93065998]), array([0.78987545, 0.93935592, 0.75449603, 0.8424    , 0.74731627,
       0.9030837 , 0.60076368, 0.90850202, 0.90924881, 0.92563357]), array([1257, 1217, 1191, 1258, 1237, 1129, 1114, 1246, 1154, 1197]))
0.910184608984921




# Logistic Regression 3
- Multi-class choice: Multinomial
- Solver: Newton-CG
- Max Iteration: 1000

In [None]:
penalt = 'l2'
solverMode = 'newton-cg'
maxIter = 500
multiClass = 'multinomial'

logisticModel3 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel3.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel3.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel3.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel3.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction3 = regressionLogisticCostFunction(trainTarget, logisticModel3, fashionTrainParams)
print (costFunction3)

# Logistic Regression 4
- Multi-class choice: Multinomial
- Solver: SAG
- Max Iteration: 10

In [None]:
penalt = 'l2'
solverMode = 'sag'
maxIter = 10
multiClass = 'multinomial'

logisticModel4 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel4.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel4.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel4.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel4.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction4 = regressionLogisticCostFunction(trainTarget, logisticModel4, fashionTrainParams)
print (costFunction4)

# Logistic Regression 5
- Multi-class choice: Multinomial
- Solver: SAG
- Max Iteration: 100

In [None]:
penalt = 'l2'
solverMode = 'sag'
maxIter = 100
multiClass = 'multinomial'

logisticModel5 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel5.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel5.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel5.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel5.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction5 = regressionLogisticCostFunction(trainTarget, logisticModel5, fashionTrainParams)
print (costFunction6)

# Logistic Regression 6
- Multi-class choice: Multinomial
- Solver: SAG
- Max Iteration: 500

In [None]:
penalt = 'l2'
solverMode = 'sag'
maxIter = 500
multiClass = 'multinomial'

logisticModel6 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel6.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel6.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel6.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel6.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction6 = regressionLogisticCostFunction(trainTarget, logisticModel6, fashionTrainParams)
print (costFunction6)

# Logistic Regression 7
- Multi-class choice: One vs All
- Solver: Newton-CG
- Max Iteration: 10

In [None]:
penalt = 'l2'
solverMode = 'newton-cg'
maxIter = 10
multiClass = 'ovr'

logisticModel7 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel7.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel7.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel7.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel7.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction7 = regressionLogisticCostFunction(trainTarget, logisticModel7, fashionTrainParams)
print (costFunction7)

# Logistic Regression 8
- Multi-class choice: One vs All
- Solver: Newton-CG
- Max Iteration: 100

In [None]:
penalt = 'l2'
solverMode = 'newton-cg'
maxIter = 100
multiClass = 'ovr'

logisticModel8 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel8.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel8.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel8.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel8.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction8 = regressionLogisticCostFunction(trainTarget, logisticModel8, fashionTrainParams)
print (costFunction8)

# Logistic Regression 9
- Multi-class choice: One vs All
- Solver: Newton-CG
- Max Iteration: 500

In [None]:
penalt = 'l2'
solverMode = 'newton-cg'
maxIter = 500
multiClass = 'ovr'

logisticModel9 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel11.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel9.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel9.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel9.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction9 = regressionLogisticCostFunction(trainTarget, logisticModel9, fashionTrainParams)
print (costFunction9)

# Logistic Regression 10
- Multi-class choice: One vs All
- Solver: SAG
- Max Iteration: 10

In [None]:
penalt = 'l2'
solverMode = 'sag'
maxIter = 10
multiClass = 'ovr'

logisticModel10 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel10.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel10.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel10.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel10.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction10 = regressionLogisticCostFunction(trainTarget, logisticModel10, fashionTrainParams)
print (costFunction10)

# Logistic Regression 11
- Multi-class choice: One vs All
- Solver: SAG
- Max Iteration: 100

In [None]:
penalt = 'l2'
solverMode = 'sag'
maxIter = 100
multiClass = 'ovr'

logisticModel11 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel11.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel11.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel11.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel11.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction11 = regressionLogisticCostFunction(trainTarget, logisticModel11, fashionTrainParams)
print (costFunction11)

# Logistic Regression 12
- Multi-class choice: One vs All
- Solver: SAG
- Max Iteration: 500

In [None]:
penalt = 'l2'
solverMode = 'sag'
maxIter = 500
multiClass = 'ovr'

logisticModel12 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel12.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel12.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel12.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel12.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction12 = regressionLogisticCostFunction(trainTarget, logisticModel12, fashionTrainParams)
print (costFunction12)

# Logistic Regression 13
- Multi-class choice: One vs All
- Solver: Liblinear
- Max Iteration: 10

In [None]:
penalt = 'l1'
solverMode = 'liblinear'
maxIter = 10
multiClass = 'ovr'

logisticModel13 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel13.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel13.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel13.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel13.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction13 = regressionLogisticCostFunction(trainTarget, logisticModel13, fashionTrainParams)
print (costFunction13)

# Logistic Regression 14
- Multi-class choice: One vs All
- Solver: Liblinear
- Max Iteration: 100

In [None]:
penalt = 'l1'
solverMode = 'liblinear'
maxIter = 100
multiClass = 'ovr'

logisticModel14 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel14.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel14.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel14.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel14.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction14 = regressionLogisticCostFunction(trainTarget, logisticModel14, fashionTrainParams)
print (costFunction14)

# Logistic Regression 15
- Multi-class choice: One vs All
- Solver: Liblinear
- Max Iteration: 500

In [None]:
penalt = 'l1'
solverMode = 'liblinear'
maxIter = 500
multiClass = 'ovr'

logisticModel15 = LogisticRegression(max_iter = maxIter, multi_class = multiClass, solver = solverMode)

start = time.clock()
logisticModel15.fit(fashionTrainParams, fashionTrainTarget)
print (time.clock() - start)

print("Score : "+ str(logisticModel15.score(fashionValidationSetParams, fashionValidationSetTarget)))
print(metrics.accuracy_score(logisticModel15.predict(fashionValidationSetParams), fashionValidationSetTarget))
print(metrics.precision_recall_fscore_support(logisticModel15.predict(fashionValidationSetParams), fashionValidationSetTarget))

costFunction15 = regressionLogisticCostFunction(trainTarget, logisticModel15, fashionTrainParams)
print (costFunction15)

In [None]:

fashionTestParams, fashionTestTarget = formatArray (fashionTestDataset)
fashionTestParams = addColumnThetaZero(fashionTestParams)

testCostFunction1 = regressionLogisticCostFunction(fashionTestTarget, logisticModel1, fashionTestParams)
testCostFunction2 = regressionLogisticCostFunction(fashionTestTarget, logisticModel2, fashionTestParams)
testCostFunction3 = regressionLogisticCostFunction(fashionTestTarget, logisticModel3, fashionTestParams)
testCostFunction4 = regressionLogisticCostFunction(fashionTestTarget, logisticModel4, fashionTestParams)
testCostFunction5 = regressionLogisticCostFunction(fashionTestTarget, logisticModel5, fashionTestParams)
testCostFunction6 = regressionLogisticCostFunction(fashionTestTarget, logisticModel6, fashionTestParams)
testCostFunction7 = regressionLogisticCostFunction(fashionTestTarget, logisticModel7, fashionTestParams)
testCostFunction8 = regressionLogisticCostFunction(fashionTestTarget, logisticModel8, fashionTestParams)
testCostFunction9 = regressionLogisticCostFunction(fashionTestTarget, logisticModel9, fashionTestParams)
testCostFunction10 = regressionLogisticCostFunction(fashionTestTarget, logisticModel10, fashionTestParams)
testCostFunction11 = regressionLogisticCostFunction(fashionTestTarget, logisticModel11, fashionTestParams)
testCostFunction12 = regressionLogisticCostFunction(fashionTestTarget, logisticModel12, fashionTestParams)
testCostFunction13 = regressionLogisticCostFunction(fashionTestTarget, logisticModel13, fashionTestParams)
testCostFunction14 = regressionLogisticCostFunction(fashionTestTarget, logisticModel14, fashionTestParams)
testCostFunction15 = regressionLogisticCostFunction(fashionTestTarget, logisticModel15, fashionTestParams)
print ("Custo e acurácia 1":)
print (testCostFunction1)
print(metrics.accuracy_score(logisticModel1.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 2":)
print (testCostFunction2)
print(metrics.accuracy_score(logisticModel2.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 3":)
print (testCostFunction3)
print(metrics.accuracy_score(logisticModel3.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 4":)
print (testCostFunction4)
print(metrics.accuracy_score(logisticModel4.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 5":)
print (testCostFunction5)
print(metrics.accuracy_score(logisticModel5.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 6":)
print (testCostFunction6)
print(metrics.accuracy_score(logisticModel6.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 7":)
print (testCostFunction7)
print(metrics.accuracy_score(logisticModel7.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 8":)
print (testCostFunction8)
print(metrics.accuracy_score(logisticModel8.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 9":)
print (testCostFunction9)
print(metrics.accuracy_score(logisticModel9.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 10":)
print (testCostFunction10)
print(metrics.accuracy_score(logisticModel10.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 11":)
print (testCostFunction11)
print(metrics.accuracy_score(logisticModel11.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 12":)
print (testCostFunction12)
print(metrics.accuracy_score(logisticModel12.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 13":)
print (testCostFunction13)
print(metrics.accuracy_score(logisticModel13.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 14":)
print (testCostFunction14)
print(metrics.accuracy_score(logisticModel14.predict(fashionTestParams), fashionTestTarget))
print ("Custo e acurácia 15":)
print (testCostFunction15)
print(metrics.accuracy_score(logisticModel15.predict(fashionTestParams), fashionTestTarget))