In [103]:
from black import out
import numpy as np
import random
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

# libreria local
import PUJ.Model.Logistic

In [104]:
# constants

MODEL_WEIGHTS = "mnists_weights.txt"

In [105]:
# download MINST dataset
tf.keras.datasets.mnist.load_data(path="mnist.npz")

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
X = np.concatenate((x_train, x_test), axis=0)
Y = np.concatenate((y_train, y_test), axis=0)

XFlattened = X.reshape((X.shape[0], X.shape[1] * X.shape[2]))
labels = sorted(set(Y))

print("X.shape", XFlattened.shape)
print("Labels", labels)

X.shape (70000, 784)
Labels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [106]:
## draw number
def drawNumber(numberArray, label):
    plt.title("Number " + str(label))
    plt.imshow(np.asarray(numberArray), cmap="gray", vmin=0, vmax=np.max(numberArray))
    plt.show

In [107]:
def initializeModel(labels, paramsSize, seed=0):
    if seed > 0:
        random.seed(seed)

    # It creates a list of models, one for each label.
    models = []
    for labelItem in labels:
        model = PUJ.Model.Logistic()
        model.label = str(labelItem)
        # Initializing the parameters of the model with random values.
        model.setParameters([random.uniform(-1, 1) for n in range(paramsSize + 1)])
        models += [model]

    return models

In [108]:
def saveModel(models, modelFileName):
    buffer = str(len(labels))
    for model in models:
        buffer += "\n" + model.label + " " + str(model)
    out = open(modelFileName, "w")
    out.write(buffer)
    out.close()

In [109]:
def trainModels(models, x, y, learningRate=1e-3):
    for i, model in enumerate(models):

        print("\ntraining model: ", model.label)

        yTemp = np.matrix(np.array([1 if item == int(model.label) else 0 for item in y]))
        modelCost = PUJ.Model.Logistic.Cost(model, x, yTemp.T)
        # # Debugger
        debugger = PUJ.Optimizer.Debug.Simple
        # debugger = PUJ.Optimizer.Debug.PlotPolynomialCost(x, yTemp.T)

        # Fit using an optimization algorithm
        opt = PUJ.Optimizer.GradientDescent(modelCost)
        opt.setDebugFunction(debugger)
        opt.setLearningRate(learningRate)
        opt.setNumberOfIterations(200)
        opt.setNumberOfDebugIterations(10)
        opt.Fit()

        models[i] = model

    return models


def loadModels(filename):
    models_file = open(filename, "r")
    models_lines = models_file.readlines()
    models_file.close()

    labels = []
    models = []
    for l in models_lines[1:]:
        d = l.split()
        labels += [d[0]]
        model = PUJ.Model.Logistic()
        model.label = str(d[0])
        model.setParameters([float(v) for v in d[2:]])
        models += [model]

    return models


def evaluateAll(models, X):
    estimatedLabels = []
    for row in X:
        estimatedLabels += [evaluate(models, row)]
    return np.array(estimatedLabels)


def evaluate(models, image):
    flatImage = image.reshape((image.shape[0] * image.shape[1]))
    results = []
    for model in models:
        results += [model.evaluate(flatImage)[0, 0]]

    return results.index(max(results))


def generateConfusionMatrix(realY, estimatedY, size=2):
    """
    Generate a confusion matrix

    :param realY: The actual labels of the data
    :param estimatedY: the estimated labels of the data points
    :param size: the number of classes, defaults to 2 (optional)
    """
    matrix = np.zeros((size, size))
    for i in range(realY.shape[0]):
        matrix[int(realY[i]), int(estimatedY[i])] += 1

    return matrix


def metrics(y_real, y_estimated, labels):
    cm = confusion_matrix(y_real, y_estimated, labels=labels)
    cr = classification_report(y_real, y_estimated, labels=labels)
    return cm, cr

In [110]:
# models = initializeModel(labels, XFlattened.shape[1], seed=12)
# saveModel(models, MODEL_WEIGHTS)
# X_train, X_test, y_train, y_test = train_test_split(XFlattened, Y, train_size=0.7, shuffle=True)

# trainedModels = trainModels(models, X_train, y_train)
# saveModel(trainedModels, "mnists_weights2.txt")


models = loadModels("./trained_weights/mnists_weights_trained_10000.txt")
estimatedY = evaluateAll(models, x_test)
cm, cr = metrics(y_test, estimatedY, labels)

print(cm)
print(cr)

  return 1.0 / ( 1.0 + numpy.exp( -super( ).evaluate( x ) ) )


[[ 950    0    3    6    0    6    6    2    5    2]
 [   2 1099    5    3    1    2    5    1   16    1]
 [  13    9  906   21    9    7   10   11   44    2]
 [   4    3   43  897    3   25    4    5   15   11]
 [   2    3    8    5  900    1    8    2   14   39]
 [  16    3    6   60   19  703   17    7   52    9]
 [  11    1   15    7   12   25  877    2    8    0]
 [   3   13   25   21   12    3    0  913   15   23]
 [  12   12   29   37   18   69   10   10  760   17]
 [   6    8    9   20   64   16    2   42   38  804]]
              precision    recall  f1-score   support

           0       0.93      0.97      0.95       980
           1       0.95      0.97      0.96      1135
           2       0.86      0.88      0.87      1032
           3       0.83      0.89      0.86      1010
           4       0.87      0.92      0.89       982
           5       0.82      0.79      0.80       892
           6       0.93      0.92      0.92       958
           7       0.92      0.89   

In [111]:
from black import out
import numpy as np
import random
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

# libreria local
import PUJ.Model.Logistic

In [112]:
# constants

MODEL_WEIGHTS = "mnists_weights.txt"

In [113]:
# download MINST dataset
tf.keras.datasets.mnist.load_data(path="mnist.npz")

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
X = np.concatenate((x_train, x_test), axis=0)
Y = np.concatenate((y_train, y_test), axis=0)

XFlattened = X.reshape((X.shape[0], X.shape[1] * X.shape[2]))
labels = sorted(set(Y))

print("X.shape", XFlattened.shape)
print("Labels", labels)

X.shape (70000, 784)
Labels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [114]:
## draw number
def drawNumber(numberArray, label):
    plt.title("Number " + str(label))
    plt.imshow(np.asarray(numberArray), cmap="gray", vmin=0, vmax=np.max(numberArray))
    plt.show

In [115]:
def initializeModel(labels, paramsSize, seed=0):
    if seed > 0:
        random.seed(seed)

    # It creates a list of models, one for each label.
    models = []
    for labelItem in labels:
        model = PUJ.Model.Logistic()
        model.label = str(labelItem)
        # Initializing the parameters of the model with random values.
        model.setParameters([random.uniform(-1, 1) for n in range(paramsSize + 1)])
        models += [model]

    return models

In [116]:
def saveModel(models, modelFileName):
    buffer = str(len(labels))
    for model in models:
        buffer += "\n" + model.label + " " + str(model)
    out = open(modelFileName, "w")
    out.write(buffer)
    out.close()