In [52]:
import numpy as np
import math
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, auc

from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from scipy.ndimage.filters import uniform_filter1d
from tensorflow.keras.layers import Dense, Conv1D, MaxPool1D, BatchNormalization, Dropout, Flatten, Reshape
from tensorflow.keras.models import Sequential
import matplotlib as plt


%matplotlib inline

In [53]:
# data
x_train = pd.read_csv(r'C:/Users/rg654th/Downloads/other/Exoplanet/Data/final_xtrain.csv')
x_test = pd.read_csv(r'C:/Users/rg654th/Downloads/other/Exoplanet/Data/final_xtest.csv')
y_train = pd.read_csv(r'C:/Users/rg654th/Downloads/other/Exoplanet/Data/final_ytrain.csv')
y_test = pd.read_csv(r'C:/Users/rg654th/Downloads/other/Exoplanet/Data/final_ytest.csv')

In [54]:
# turn pandas to numpy array
x_train = x_train.values
y_train = y_train.values
x_test = x_test.values
y_test = y_test.values

In [55]:
x_train = np.stack([x_train, uniform_filter1d(x_train, axis=1, size=200)], axis=2)
x_test = np.stack([x_test, uniform_filter1d(x_test, axis=1, size=200)], axis=2)

In [56]:
model = Sequential()
model.add(Conv1D(filters=8, kernel_size=11, activation='relu', input_shape=x_train.shape[1:]))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
model.add(Conv1D(filters=16, kernel_size=11, activation='relu'))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
model.add(Conv1D(filters=32, kernel_size=11, activation='relu'))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
model.add(Conv1D(filters=64, kernel_size=11, activation='relu'))
model.add(MaxPool1D(strides=4))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [57]:
# equal number of positive and negative samples seen by the network

def batch_generator(x_train, y_train, batch_size=32):
    
    half_batch = batch_size // 2
    x_batch = np.empty((batch_size, x_train.shape[1], x_train.shape[2]), dtype='float32')
    y_batch = np.empty((batch_size, y_train.shape[1]), dtype='float32')
    
    yes_idx = np.where(y_train[:,0] == 1.)[0]
    non_idx = np.where(y_train[:,0] == 0.)[0]
    
    while True:
        np.random.shuffle(yes_idx)
        np.random.shuffle(non_idx)
    
        x_batch[:half_batch] = x_train[yes_idx[:half_batch]]
        x_batch[half_batch:] = x_train[non_idx[half_batch:batch_size]]
        y_batch[:half_batch] = y_train[yes_idx[:half_batch]]
        y_batch[half_batch:] = y_train[non_idx[half_batch:batch_size]]
    
        for i in range(batch_size):
            sz = np.random.randint(x_batch.shape[1])
            x_batch[i] = np.roll(x_batch[i], sz, axis = 0)
     
        yield x_batch, y_batch

In [68]:
model.compile(optimizer=Adam(1e-5), loss = 'binary_crossentropy', metrics=['accuracy'])
hist = model.fit_generator(batch_generator(x_train, y_train, 32), 
                           validation_data=(x_test, y_test), 
                           verbose=1, epochs=5,
                           steps_per_epoch=x_train.shape[1]//32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [69]:
# faster learning
model.compile(optimizer=Adam(3e-5), loss = 'binary_crossentropy', metrics=['accuracy'])
hist = model.fit_generator(batch_generator(x_train, y_train, 32), 
                           validation_data=(x_test, y_test), 
                           verbose=1, epochs=5,
                           steps_per_epoch=x_train.shape[1]//32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [70]:
# Show confusion matrix
y_predi = model.predict_classes(x_test)

results = confusion_matrix(y_test, y_predi, labels=None, sample_weight=None)
print(results)

[[1095   29]
 [   6    2]]


In [71]:
# metrics
score2 = accuracy_score(y_test, y_predi)
score3 = precision_score(y_test, y_predi)
score4 = recall_score(y_test, y_predi)

print("Precision", "", score3)
print("Accuracy", "", score2)
print("Recall", "", score4)

Precision  0.06451612903225806
Accuracy  0.9690812720848057
Recall  0.25
