In [7]:
import numpy as np

dataset = "CSpace2"

training_file = "./data/%s_training.csv" % dataset
test_file = "./data/%s_test.csv" % dataset


training_data = np.loadtxt(training_file, skiprows=3, delimiter=',')
test_data = np.loadtxt(test_file, skiprows=3, delimiter=',')

training_X = training_data[:, 0:2]
training_y = training_data[:, 2]
# training_y[training_y == -1] = 0 # convert y to 0;

test_X = test_data[:, 0:2]
test_y = test_data[:, 2]
# test_y[test_y == -1] = 0 # convert y to 0;

print(training_X.shape)
print(training_y.shape)

print(test_X.shape,)
print(test_y.shape,)

# print("Number of positive vs negative samples: %s vs %s" % (np.count_nonzero(training_y == 1), np.count_nonzero(training_y == -1)))

(800, 2)
(800,)
(200, 2)
(200,)


In [8]:
from sklearn.linear_model import LogisticRegression
import json

lr_output_file = "./logRegression_%s.json" % dataset

lr_clf = LogisticRegression(random_state=0, solver='lbfgs',verbose=1).fit(training_X, training_y)
y_lr = lr_clf.predict(test_X)
p_lr = lr_clf.predict_proba(test_X).T # Need to align the first axis with y_pred

header_lr = ['y_pred'] + lr_clf.classes_.tolist()
print(header_lr)
output_lr_array = np.vstack((y_lr, p_lr)).T
output_lr = {
    'coef': lr_clf.coef_.tolist(),
    'intercept': lr_clf.intercept_.tolist(),
    'test_output_header': header_lr,
    'test_output': output_lr_array.tolist()
}

with open(lr_output_file, 'w') as outfile:
    json.dump(output_lr, outfile)

['y_pred', -1.0, 1.0]


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished


In [9]:
from sklearn.linear_model import SGDClassifier
import json

sgd_output_file = "./sgd_%s.json" % dataset

sgd_clf = SGDClassifier(loss='log', alpha=1e-4, random_state=0, verbose=1).fit(training_X, training_y)
y_lr = sgd_clf.predict(test_X)
p_lr = sgd_clf.predict_proba(test_X).T # Need to align the first axis with y_pred

header_sgd = ['y_pred'] + sgd_clf.classes_.tolist()
print(header_lr)
output_sgd_array = np.vstack((y_lr, p_lr)).T
output_sgd = {
    'coef': sgd_clf.coef_.tolist(),
    'intercept': sgd_clf.intercept_.tolist(),
    'test_output_header': header_sgd,
    'test_output': output_sgd_array.tolist()
}

with open(sgd_output_file, 'w') as outfile:
    json.dump(output_sgd, outfile)

-- Epoch 1
Norm: 8.73, NNZs: 2, Bias: 61.115077, T: 800, Avg. loss: 0.555963
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 16.08, NNZs: 2, Bias: 58.040573, T: 1600, Avg. loss: 0.321125
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 8.96, NNZs: 2, Bias: 59.717566, T: 2400, Avg. loss: 0.243762
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 18.94, NNZs: 2, Bias: 55.956834, T: 3200, Avg. loss: 0.156149
Total training time: 0.00 seconds.
-- Epoch 5
Norm: 11.36, NNZs: 2, Bias: 58.217413, T: 4000, Avg. loss: 0.162834
Total training time: 0.01 seconds.
-- Epoch 6
Norm: 12.67, NNZs: 2, Bias: 57.987640, T: 4800, Avg. loss: 0.092928
Total training time: 0.01 seconds.
-- Epoch 7
Norm: 17.90, NNZs: 2, Bias: 55.545151, T: 5600, Avg. loss: 0.130886
Total training time: 0.01 seconds.
-- Epoch 8
Norm: 15.29, NNZs: 2, Bias: 55.964399, T: 6400, Avg. loss: 0.123278
Total training time: 0.01 seconds.
-- Epoch 9
Norm: 17.33, NNZs: 2, Bias: 54.979448, T: 7200, Avg. loss: 0.093483
Total traini

In [10]:
from sklearn.neural_network import MLPClassifier

mlp_output_file = "./mlp_%s.json" % dataset

# TODO: Do we need to transform the output from {-1, 1} to {0, 1}?
mlp_clf = MLPClassifier(solver='adam', alpha=1e-4, activation='relu',hidden_layer_sizes=(256, 256),
                        random_state=1, max_iter=1000, verbose=True).fit(training_X, training_y)
y_mlp = mlp_clf.predict(test_X)
p_mlp = mlp_clf.predict_proba(test_X).T

header_mlp = ['y_pred'] + mlp_clf.classes_.tolist()
print(header_mlp)
output_mlp_array = np.vstack((y_mlp, p_mlp)).T

coefs = []
intercepts = []
for coef in mlp_clf.coefs_:
    coefs.append(coef.tolist())

for intercept in mlp_clf.intercepts_:
    intercepts.append(intercept.tolist())

output_mlp = {
    'coef': coefs,
    'intercept': intercepts,
    'test_output_header': header_mlp,
    'test_output': output_mlp_array.tolist()
}

with open(mlp_output_file, 'w') as outfile:
    json.dump(output_mlp, outfile)

Iteration 1, loss = 0.48246091
Iteration 2, loss = 0.21618065
Iteration 3, loss = 0.11123294
Iteration 4, loss = 0.07108934
Iteration 5, loss = 0.05971234
Iteration 6, loss = 0.05599833
Iteration 7, loss = 0.05379363
Iteration 8, loss = 0.05168343
Iteration 9, loss = 0.04920830
Iteration 10, loss = 0.04587292
Iteration 11, loss = 0.04389534
Iteration 12, loss = 0.04180975
Iteration 13, loss = 0.03877963
Iteration 14, loss = 0.03719557
Iteration 15, loss = 0.03542967
Iteration 16, loss = 0.03414566
Iteration 17, loss = 0.03297715
Iteration 18, loss = 0.03158088
Iteration 19, loss = 0.03038861
Iteration 20, loss = 0.02946444
Iteration 21, loss = 0.02814407
Iteration 22, loss = 0.02705257
Iteration 23, loss = 0.02602492
Iteration 24, loss = 0.02502616
Iteration 25, loss = 0.02420860
Iteration 26, loss = 0.02325380
Iteration 27, loss = 0.02245297
Iteration 28, loss = 0.02180310
Iteration 29, loss = 0.02125769
Iteration 30, loss = 0.02030619
Iteration 31, loss = 0.01968929
Iteration 32, los

In [11]:
for coef in mlp_clf.coefs_:
    print(coef.shape,)
    
for b in mlp_clf.intercepts_:
    print(b.shape,)

(2, 256)
(256, 256)
(256, 1)
(256,)
(256,)
(1,)
