In [265]:
import json
import pandas as pd
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss

In [266]:
model = SGDClassifier(loss='log_loss', max_iter=1000, learning_rate='constant', eta0=0.1)

In [267]:
c1 = np.array([
    [-6.32847312e-03, -1.22919032e-03,  9.10103308e-02,  9.63641294e-02],
    [-1.52684649e-02, -8.40774794e-03,  9.09436171e-02,  9.68655199e-02],
    [ 3.78315990e-03,  5.79172347e-05, -6.72514367e-02, -7.13816335e-02]
])

i1 = np.array([-0.00066241, -0.00265853,  0.00022719])

c2 = np.array([
    [-0.00825016, -0.00214151,  0.08803414,  0.09432094],
    [-0.0174228,  -0.00943632,  0.08930871,  0.0962702 ],
    [ 0.00309108, -0.00028205, -0.09333679, -0.09713136]
])

i2 = np.array([-0.00097097, -0.00300411,  0.00011484])

In [268]:
model.coef_ = c1
model.intercept_ = i1

In [269]:
model.classes_ = [0, 1, 2]

In [270]:
[model.coef_, model.intercept_]

[array([[-6.32847312e-03, -1.22919032e-03,  9.10103308e-02,
          9.63641294e-02],
        [-1.52684649e-02, -8.40774794e-03,  9.09436171e-02,
          9.68655199e-02],
        [ 3.78315990e-03,  5.79172347e-05, -6.72514367e-02,
         -7.13816335e-02]]),
 array([-0.00066241, -0.00265853,  0.00022719])]

In [271]:
def load_data(data_path, is_train=True):
    """ Load data from disk.

    :param data_path: Path to data file.
    :type data_path: str
    :param is_train: Whether to load training or test data.
    :type is_train: bool
    :return: Tuple of data and labels.
    :rtype: tuple
    """

    if data_path is None:
        with open(_get_data_path(), 'r') as json_file:
            data = json.load(json_file)
    else:
        with open(data_path, 'r') as json_file:
            data = json.load(json_file)

    if is_train:
        X = data['x_train']
        y = data['y_train']
    else:
        X = data['x_test']
        y = data['y_test']

    # Normalize - Do we normalize?

    return X, y

In [272]:
data_path = "data/clients/2/iris.json"
data_path = "iris_data/iris.json"

x_train, y_train = load_data(data_path)
x_train = pd.DataFrame(x_train)
y_train = pd.DataFrame(y_train)
x_test, y_test = load_data(data_path, is_train=False)
x_test = pd.DataFrame(x_test)
y_test = pd.DataFrame(y_test)

In [273]:
[len(x_train), len(y_train), len(x_test), len(y_test)]

[5200, 5200, 1300, 1300]

In [274]:
report = {
        "training_loss": log_loss(y_train, model.predict_proba(x_train)),
        "training_accuracy": accuracy_score(y_train, model.predict(x_train)),
        "test_loss": log_loss(y_test, model.predict_proba(x_test)),
        "test_accuracy": accuracy_score(y_test, model.predict(x_test)),
    }

In [275]:
report

{'training_loss': 1.2560450485715298,
 'training_accuracy': 0.4601923076923077,
 'test_loss': 1.3271029442469098,
 'test_accuracy': 0.4653846153846154}

In [276]:
model.coef_ = c2
model.intercept_ = i2

In [277]:
report = {
        "training_loss": log_loss(y_train, model.predict_proba(x_train)),
        "training_accuracy": accuracy_score(y_train, model.predict(x_train)),
        "test_loss": log_loss(y_test, model.predict_proba(x_test)),
        "test_accuracy": accuracy_score(y_test, model.predict(x_test)),
    }

report

{'training_loss': 1.2954578431605002,
 'training_accuracy': 0.4601923076923077,
 'test_loss': 1.365947199831817,
 'test_accuracy': 0.4653846153846154}