In [13]:
import json
import pandas as pd
import numpy as np

from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss

In [14]:
def load_data(data_path, is_train=True):
    """ Load data from disk.

    :param data_path: Path to data file.
    :type data_path: str
    :param is_train: Whether to load training or test data.
    :type is_train: bool
    :return: Tuple of data and labels.
    :rtype: tuple
    """

    if data_path is None:
        with open(_get_data_path(), 'r') as json_file:
            data = json.load(json_file)
    else:
        with open(data_path, 'r') as json_file:
            data = json.load(json_file)

    if is_train:
        X = data['x_train']
        y = data['y_train']
    else:
        X = data['x_test']
        y = data['y_test']

    # Normalize - Do we normalize?

    return X, y

In [15]:
x_train, y_train = load_data(data_path="iris_data/iris.json")
x_test, y_test = load_data(data_path="iris_data/iris.json", is_train=False)

In [16]:
model = SGDClassifier(loss='log_loss', max_iter=1, learning_rate='constant', eta0=0.01)
model.fit([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [0, 1, 2])



In [17]:
model.coef_

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [18]:
model.fit(x_train, y_train)



In [19]:
model.predict([[6.7, 3.3, 5.6, 2.1]])

array([0])

In [20]:
x_test

[[5.0, 3.7, 1.8, 0.3],
 [4.7, 3.1, 1.4, 0.2],
 [6.7, 3.3, 5.6, 2.1],
 [6.4, 3.1, 5.1, 1.9],
 [5.4, 3.5, 1.3, 0.3],
 [5.5, 3.6, 1.6, 0.5],
 [7.9, 3.8, 6.3, 2.1],
 [7.3, 2.9, 6.2, 1.8],
 [5.5, 3.2, 1.4, 0.5],
 [7.5, 2.8, 6.7, 2.0],
 [6.0, 2.1, 4.9, 1.6],
 [6.4, 2.7, 5.5, 2.2],
 [5.9, 2.2, 5.0, 1.6],
 [7.6, 2.6, 6.7, 2.2],
 [5.1, 3.5, 1.3, 0.3],
 [5.5, 3.8, 1.8, 0.5],
 [6.6, 2.6, 5.7, 1.9],
 [5.2, 3.7, 1.6, 0.1],
 [6.7, 3.1, 5.5, 2.3],
 [6.3, 2.8, 5.6, 1.8],
 [4.7, 2.9, 1.2, 0.1],
 [4.9, 3.6, 1.4, 0.2],
 [5.0, 3.0, 1.5, 0.2],
 [5.4, 3.4, 1.3, 0.2],
 [4.4, 3.2, 1.3, 0.1],
 [5.3, 3.6, 1.5, 0.3],
 [7.3, 2.9, 6.3, 2.0],
 [6.1, 2.0, 3.9, 1.1],
 [6.8, 3.1, 5.1, 2.4],
 [6.9, 3.0, 5.4, 2.1],
 [5.0, 3.2, 1.5, 0.3],
 [5.2, 3.8, 1.2, 0.3],
 [4.5, 3.1, 1.5, 0.3],
 [6.5, 3.1, 5.2, 2.0],
 [6.3, 3.1, 5.4, 1.8],
 [4.6, 3.1, 1.5, 0.1],
 [7.5, 3.1, 6.5, 2.2],
 [7.0, 2.8, 5.8, 2.0],
 [4.8, 3.4, 1.5, 0.5],
 [5.0, 3.7, 1.5, 0.5],
 [6.2, 3.1, 5.6, 1.8],
 [5.1, 4.0, 1.5, 0.1],
 [6.7, 3.3, 5.5, 2.6],
 [7.1, 3.1,

In [21]:
y_test

[0,
 0,
 2,
 2,
 0,
 0,
 2,
 2,
 0,
 2,
 2,
 2,
 2,
 2,
 0,
 0,
 2,
 0,
 2,
 2,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 1,
 2,
 2,
 0,
 0,
 0,
 2,
 2,
 0,
 2,
 2,
 0,
 0,
 2,
 0,
 2,
 2,
 0,
 1,
 2,
 0,
 2,
 2,
 2,
 0,
 0,
 2,
 0,
 0,
 1,
 0,
 0,
 2,
 2,
 0,
 2,
 0,
 0,
 0,
 2,
 0,
 0,
 2,
 0,
 2,
 2,
 0,
 2,
 2,
 2,
 2,
 1,
 2,
 2,
 2,
 1,
 0,
 2,
 0,
 0,
 0,
 0,
 1,
 0,
 2,
 0,
 2,
 0,
 0,
 2,
 2,
 2,
 0,
 0,
 2,
 0,
 1,
 1,
 0,
 2,
 0,
 0,
 2,
 1,
 2,
 0,
 2,
 0,
 0,
 0,
 2,
 0,
 2,
 1,
 0,
 0,
 1,
 2,
 2,
 2,
 0,
 0,
 0,
 0,
 2,
 0,
 2,
 1,
 2,
 2,
 2,
 2,
 0,
 0,
 2,
 2,
 2,
 0,
 1,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 2,
 2,
 2,
 0,
 0,
 2,
 2,
 1,
 2,
 2,
 2,
 0,
 0,
 0,
 0,
 2,
 0,
 2,
 0,
 0,
 2,
 2,
 2,
 0,
 2,
 1,
 0,
 2,
 0,
 0,
 0,
 2,
 2,
 0,
 0,
 0,
 0,
 2,
 2,
 2,
 2,
 1,
 2,
 2,
 0,
 0,
 2,
 2,
 2,
 0,
 2,
 0,
 2,
 1,
 2,
 0,
 0,
 2,
 0,
 2,
 2,
 2,
 2,
 0,
 2,
 0,
 2,
 0,
 2,
 2,
 2,
 2,
 0,
 2,
 0,
 2,
 2,
 2,
 2,
 0,
 0,
 2,
 0,
 2,
 0,
 2,
 2,
 0,
 0,
 0,
 0,
 2,
 0,
 2,
 1,
 0,
 1,


In [22]:
report = {
        "training_loss": log_loss(y_train, model.predict_proba(x_train)),
        "training_accuracy": accuracy_score(y_train, model.predict(x_train)),
        "test_loss": log_loss(y_test, model.predict_proba(x_test)),
        "test_accuracy": accuracy_score(y_test, model.predict(x_test)),
    }

In [23]:
report

{'training_loss': 36.020504477821895,
 'training_accuracy': 0.0003846153846153846,
 'test_loss': 36.00404167650338,
 'test_accuracy': 0.0007692307692307692}