In [1]:
import numpy as np

dataset = "CSpace1"

training_file = "./data/%s_training.csv" % dataset
test_file = "./data/%s_test.csv" % dataset


training_data = np.loadtxt(training_file, skiprows=3, delimiter=',')
test_data = np.loadtxt(test_file, skiprows=3, delimiter=',')

training_X = training_data[:, 0:2]
training_y = training_data[:, 2]
# training_y[training_y == -1] = 0 # convert y to 0;

test_X = test_data[:, 0:2]
test_y = test_data[:, 2]
# test_y[test_y == -1] = 0 # convert y to 0;

print(training_X.shape)
print(training_y.shape)

print(test_X.shape,)
print(test_y.shape,)

# print("Number of positive vs negative samples: %s vs %s" % (np.count_nonzero(training_y == 1), np.count_nonzero(training_y == -1)))

(1600, 2)
(1600,)
(400, 2)
(400,)


In [2]:
from sklearn.linear_model import LogisticRegression
import json

lr_output_file = "./logRegression_%s.json" % dataset

lr_clf = LogisticRegression(random_state=0, solver='lbfgs',verbose=1).fit(training_X, training_y)
y_lr = lr_clf.predict(test_X)
p_lr = lr_clf.predict_proba(test_X).T # Need to align the first axis with y_pred

header_lr = ['y_pred'] + lr_clf.classes_.tolist()
print(header_lr)
output_lr_array = np.vstack((y_lr, p_lr)).T
output_lr = {
    'coef': lr_clf.coef_.tolist(),
    'intercept': lr_clf.intercept_.tolist(),
    'test_output_header': header_lr,
    'test_output': output_lr_array.tolist()
}

with open(lr_output_file, 'w') as outfile:
    json.dump(output_lr, outfile)

['y_pred', -1.0, 1.0]


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished


In [4]:
from sklearn.linear_model import SGDClassifier
import json

sgd_output_file = "./sgd_%s.json" % dataset

sgd_clf = SGDClassifier(loss='log', alpha=1e-4, random_state=0, verbose=1).fit(training_X, training_y)
y_lr = sgd_clf.predict(test_X)
p_lr = sgd_clf.predict_proba(test_X).T # Need to align the first axis with y_pred

header_sgd = ['y_pred'] + sgd_clf.classes_.tolist()
print(header_lr)
output_sgd_array = np.vstack((y_lr, p_lr)).T
output_sgd = {
    'coef': sgd_clf.coef_.tolist(),
    'intercept': sgd_clf.intercept_.tolist(),
    'test_output_header': header_sgd,
    'test_output': output_sgd_array.tolist()
}

with open(sgd_output_file, 'w') as outfile:
    json.dump(output_sgd, outfile)

-- Epoch 1
Norm: 3.33, NNZs: 2, Bias: 1.849983, T: 1600, Avg. loss: 3.040701
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 5.91, NNZs: 2, Bias: -0.896838, T: 3200, Avg. loss: 1.411780
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 1.46, NNZs: 2, Bias: -1.705348, T: 4800, Avg. loss: 1.001597
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 0.50, NNZs: 2, Bias: -2.449151, T: 6400, Avg. loss: 0.817565
Total training time: 0.00 seconds.
-- Epoch 5
Norm: 1.18, NNZs: 2, Bias: -1.863430, T: 8000, Avg. loss: 0.703656
Total training time: 0.00 seconds.
-- Epoch 6
Norm: 0.85, NNZs: 2, Bias: -0.978572, T: 9600, Avg. loss: 0.656215
Total training time: 0.00 seconds.
-- Epoch 7
Norm: 1.07, NNZs: 2, Bias: -0.901788, T: 11200, Avg. loss: 0.620853
Total training time: 0.00 seconds.
-- Epoch 8
Norm: 0.22, NNZs: 2, Bias: -2.761558, T: 12800, Avg. loss: 0.606187
Total training time: 0.00 seconds.
-- Epoch 9
Norm: 0.13, NNZs: 2, Bias: -1.815312, T: 14400, Avg. loss: 0.580233
Total training t

In [3]:
from sklearn.neural_network import MLPClassifier

mlp_output_file = "./mlp_%s.json" % dataset

# TODO: Do we need to transform the output from {-1, 1} to {0, 1}?
mlp_clf = MLPClassifier(solver='adam', alpha=1e-4, activation='relu',hidden_layer_sizes=(256, 256),
                        random_state=1, max_iter=1000, verbose=True).fit(training_X, training_y)
y_mlp = mlp_clf.predict(test_X)
p_mlp = mlp_clf.predict_proba(test_X).T

header_mlp = ['y_pred'] + mlp_clf.classes_.tolist()
print(header_mlp)
output_mlp_array = np.vstack((y_mlp, p_mlp)).T

coefs = []
intercepts = []
for coef in mlp_clf.coefs_:
    coefs.append(coef.tolist())

for intercept in mlp_clf.intercepts_:
    intercepts.append(intercept.tolist())

output_mlp = {
    'coef': coefs,
    'intercept': intercepts,
    'test_output_header': header_mlp,
    'test_output': output_mlp_array.tolist()
}

with open(mlp_output_file, 'w') as outfile:
    json.dump(output_mlp, outfile)

Iteration 1, loss = 0.54661467
Iteration 2, loss = 0.39732597
Iteration 3, loss = 0.36984004
Iteration 4, loss = 0.32599589
Iteration 5, loss = 0.28761793
Iteration 6, loss = 0.24836735
Iteration 7, loss = 0.21164702
Iteration 8, loss = 0.17738655
Iteration 9, loss = 0.14763241
Iteration 10, loss = 0.12418803
Iteration 11, loss = 0.10416211
Iteration 12, loss = 0.08914514
Iteration 13, loss = 0.07674189
Iteration 14, loss = 0.06754846
Iteration 15, loss = 0.06019104
Iteration 16, loss = 0.05450723
Iteration 17, loss = 0.04995987
Iteration 18, loss = 0.04628980
Iteration 19, loss = 0.04283084
Iteration 20, loss = 0.04036279
Iteration 21, loss = 0.03796774
Iteration 22, loss = 0.03612794
Iteration 23, loss = 0.03455014
Iteration 24, loss = 0.03287674
Iteration 25, loss = 0.03078394
Iteration 26, loss = 0.02942503
Iteration 27, loss = 0.02843009
Iteration 28, loss = 0.02823579
Iteration 29, loss = 0.02639043
Iteration 30, loss = 0.02565600
Iteration 31, loss = 0.02528919
Iteration 32, los

In [6]:
for coef in mlp_clf.coefs_:
    print(coef.shape,)
    
for b in mlp_clf.intercepts_:
    print(b.shape,)

(2, 256)
(256, 256)
(256, 1)
(256,)
(256,)
(1,)
