In [1]:
% matplotlib inline

import numpy as np

import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

In [2]:
H1_data = np.genfromtxt('H1_times.csv')
L1_data = np.genfromtxt('L1_times.csv')

In [3]:
print(len(H1_data[np.where(H1_data[:,8] == 1)]))
print(len(H1_data[np.where(H1_data[:,9] == 1)]))
print(len(H1_data[np.where(H1_data[:,10] == 1)]))

print(len(L1_data[np.where(L1_data[:,8] == 1)]))
print(len(L1_data[np.where(L1_data[:,9] == 1)]))
print(len(L1_data[np.where(L1_data[:,10] == 1)]))

10
1
0
2
2
0


In [4]:
# using multilabels, with i = 0 = cbc,
#                           = 1 = burst,
#                           = 2 = stoch,
#                           = 3 = terrestrial
training = np.array([])
target = np.array([])
n_terr = 0
for event in H1_data:
    if (event[8] == 1 or event[9] == 1 or event[10] == 1):
        if (len(training) == 0):
            training = np.array([event[2],event[3],event[4],event[5]])
            target = np.array([int(event[8]),int(event[9]),int(event[10]),0])
        else:
            training = np.vstack((training,np.array([event[2],event[3],event[4],event[5]])))
            target = np.vstack((target,np.array([int(event[8]),int(event[9]),int(event[10]),0])))
        
        continue # not counting hardware injections as terrestrial

    
    b = event[0] - 0.1
    e = event[1] + 0.1
    
    # find H1 events whose beginings and ends do not overlap with L1 events
    b_novr = len(np.where((b > L1_data[:,0]) & (b < L1_data[:,1]))[0]) == 0
    e_novr = len(np.where((e > L1_data[:,0]) & (e < L1_data[:,1]))[0]) == 0
    
    if (b_novr and e_novr and n_terr < 200):
        if (len(training) == 0):
            training = np.array([event[2],event[3],event[4],event[5]])
            target = np.array([0,0,0,1])
        else:
            training = np.vstack((training,np.array([event[2],event[3],event[4],event[5]])))
            target = np.vstack((target, np.array([0,0,0,1])))
        n_terr += 1
        
n_terr = 0            
for event in L1_data:
    if (event[8] == 1 or event[9] == 1 or event[10] == 1):
        if (len(training) == 0):
            training = np.array([event[2],event[3],event[4],event[5]])
            target = np.array([int(event[8]),int(event[9]),int(event[10]),0])
        else:
            training = np.vstack((training,np.array([event[2],event[3],event[4],event[5]])))
            target = np.vstack((target,np.array([int(event[8]),int(event[9]),int(event[10]),0])))
        
        continue # not counting hardware injections as terrestrial

    
    b = event[0]
    e = event[1]
    
    # find H1 events whose beginings and ends do not overlap with L1 events
    b_novr = len(np.where((b > H1_data[:,0]) & (b < H1_data[:,1]))[0]) == 0
    e_novr = len(np.where((e > H1_data[:,0]) & (e < H1_data[:,1]))[0]) == 0
    
    if (b_novr and e_novr and n_terr < 200):
        if (len(training) == 0):
            training = np.array([event[2],event[3],event[4],event[5]])
            target = np.array([0,0,0,1])
        else:
            training = np.vstack((training,np.array([event[2],event[3],event[4],event[5]])))
            target = np.vstack((target, np.array([0,0,0,1])))
        n_terr += 1

In [5]:
print(training)

[[  1.20149414e+002   8.29552217e+001   2.60493827e+000   1.79769313e+308]
 [  1.78695312e+002   5.18846095e+001   2.46258503e+000   1.79769313e+308]
 [  2.41859375e+002   1.59048969e+002   2.37500000e+000   1.79769313e+308]
 ..., 
 [  7.98876953e+001   7.73583333e+001   1.74285714e+000   1.79769313e+308]
 [  1.86601562e+002   6.19886416e+001   1.91666667e+000   1.79769313e+308]
 [  2.66638184e+002   9.48213705e+001   2.00000000e+000   5.61957735e+002]]


In [6]:
# scale data for mlp
scaler = StandardScaler()  

scaler.fit(training)
train_scale = scaler.transform(training)

  X /= self.scale_


In [7]:
# split for training and testing
dat_train, dat_test, tar_train, tar_test = train_test_split(training, target, test_size=0.4)

In [8]:
# MLP params
hidden = (3,)
activ = 'logistic'
solver = 'lbfgs'
lrn = 'adaptive'

In [9]:
print(train_scale)

[[-0.79212657 -0.04926436  0.83270873         nan]
 [ 0.14548469 -0.82309472  0.51603968         nan]
 [ 1.15705575  1.84589148  0.32120413         nan]
 ..., 
 [-1.43691711 -0.18865789 -1.08501666         nan]
 [ 0.27210311 -0.57144835 -0.69837215         nan]
 [ 1.55388781  0.24626848 -0.51299464         nan]]


In [10]:
print(dat_train)

[[  1.46943848e+002   5.49563183e+001   2.63492063e+000   3.77994744e+002]
 [  1.75476074e+002   4.42561761e+001   2.19819820e+000   6.63663907e+002]
 [  2.54182129e+002   1.44645463e+002   2.24873096e+000   1.79769313e+308]
 [  1.47952148e+002   8.13437040e+001   2.26415094e+000   4.36381446e+002]
 [  8.11484375e+001   5.25291259e+001   1.85000000e+000   4.68616829e+002]
 [  1.33264160e+002   7.49643937e+001   1.92156863e+000   2.57449243e+002]
 [  1.04651367e+002   4.97667085e+001   1.97080292e+000   3.77866131e+002]
 [  3.51703125e+002   1.64031574e+002   2.27551020e+000   1.79769313e+308]
 [  9.98837891e+001   5.71482368e+001   2.05555556e+000   3.44521740e+002]
 [  4.35360840e+002   2.42393822e+002   2.52258065e+000   1.79769313e+308]
 [  1.85748047e+002   1.17384479e+002   2.44626168e+000   1.79769313e+308]
 [  1.30851562e+002   5.50796888e+001   2.40506329e+000   1.79769313e+308]
 [  1.42822754e+002   8.59715430e+001   2.00000000e+000   1.35247389e+002]
 [  2.33796875e+002   1.1

In [11]:
mlp = MLPClassifier()
parameters = {'hidden_layer_sizes':[(3,)],'activation':['logistic'],'solver':['lbfgs'],'learning_rate':['adaptive'],'alpha':(10.0 ** -np.arange(1, 7))}
clf = GridSearchCV(mlp, parameters)
clf.fit(dat_train,tar_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'alpha': array([  1.00000e-01,   1.00000e-02,   1.00000e-03,   1.00000e-04,
         1.00000e-05,   1.00000e-06]), 'activation': ['logistic'], 'solver': ['lbfgs'], 'learning_rate': ['adaptive'], 'hidden_layer_sizes': [(3,)]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [12]:
alpha = clf.best_params_['alpha']

mlp = MLPClassifier(hidden_layer_sizes=hidden,activation=activ,solver=solver,learning_rate=lrn,alpha=alpha)
mlp.fit(dat_train,tar_train)

MLPClassifier(activation='logistic', alpha=0.10000000000000001,
       batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False,
       epsilon=1e-08, hidden_layer_sizes=(3,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [13]:
mlp.score(dat_test,tar_test)

0.96987951807228912

In [31]:
scores = cross_val_score(mlp, training, target, cv=5)

In [32]:
print("Accuracy: %0.2f +/- %0.2f" % (scores.mean(), scores.std() * 2))
print(scores)

Accuracy: 0.96 +/- 0.09
[ 0.98795181  1.          0.87951807  1.          0.95180723]


In [33]:
print(np.sum(target, axis=0))

[ 12   3   0 400]


In [34]:
H1 = np.array([H1_data[:,2],H1_data[:,3],H1_data[:,4],H1_data[:,5]]).T
L1 = np.array([L1_data[:,2],L1_data[:,3],L1_data[:,4],L1_data[:,5]]).T
print(H1.shape)
print(L1.shape)
print(training.shape)

(2345, 4)
(2294, 4)
(415, 4)


In [35]:
H1_labels = mlp.predict(H1)
L1_labels = mlp.predict(L1)

In [36]:
print(np.sum(H1_labels,axis=0))
print(np.sum(L1_labels,axis=0))

[   0    0    0 2345]
[   0    0    0 2294]
