In [None]:
#Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report
#from sklearn.metrics import plot_confusion_matrix
from scipy.ndimage import gaussian_filter1d
from scipy.fftpack import fft
import seaborn as sns
import os
import zipfile

import cv2

import Models 

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D

from imblearn.over_sampling import RandomOverSampler

In [None]:
# Get Data

# data = pd.read_csv(csv_path)
# zip_path = "kepler-labelled-time-series-data.zip"
# base_dir = os.getcwd()
# csv_path = os.path.join(base_dir, zip_path)

# with zipfile.ZipFile(csv_path) as z:
#     with z.open("exoTrain.csv") as f:
#         data = pd.read_csv(f)




X = data.iloc[:, 1:]  # all columns except the first 
y = data.iloc[:, 0]   # the first column is the target

# Split into 80% train, 20% test
# Stratify helps with imbalananced datasets because it helps maintain the same class distribution in both the training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
# Convert data to np array
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)


#get the Label column and delate the class column and rescale
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)

y_train = (y_train-min(y_train))/(max(y_train)-min(y_train))
y_test = (y_test-min(y_test))/(max(y_test)-min(y_test))

X_train = np.delete(X_train,1,1)
X_test = np.delete(X_test,1,1)


In [None]:
# Graph Data
#print the light curve
time = np.arange(len(X_train[0])) * (36/60)  # time in hours

plt.figure(figsize=(20,5))
plt.title('Flux of star 10 with confirmed planet')
plt.ylabel('Flux')
plt.xlabel('Hours')
plt.plot( time , X_train[10] )     #change the number to plot what you want


In [None]:
#Normalize
norm_X_train = X_train / np.linalg.norm(X_train, axis=1, keepdims=True)
norm_X_test = X_test / np.linalg.norm(X_test, axis=1, keepdims=True)

#Smooth data
smooth_X_train = gaussian_filter1d(norm_X_train,5,1,mode= 'reflect')
smooth_X_test = gaussian_filter1d(norm_X_test,5,1,mode= 'reflect')

In [None]:
#Graph smooth data
plt.figure(figsize=(20,5))
plt.title('Flux of star 10 with confirmed planet, smoothed')
plt.ylabel('Flux')
plt.xlabel('Hours')
plt.plot(time , smooth_X_train[1000])

In [None]:
#apply FFT to smoothed data
FFT_X_train = np.abs(fft(smooth_X_train))
FFT_X_test = np.abs(fft(smooth_X_test))

In [None]:
# Plot star frequency spectrum
frequency = np.arange(len(X_train[1000])) * (1/(36.0*60.0))
len_seq = len(FFT_X_train[0])

plt.figure(figsize=(20,5))
plt.title('flux of star 1 ( with confirmed planet ) in domain of frequencies')
plt.ylabel('Abs value of FFT result')
plt.xlabel('Frequency')
plt.plot(frequency, FFT_X_train[1000])

In [None]:
# add more of the undersamples data
rm = RandomOverSampler(sampling_strategy=0.5)
overSamp_X_train, overSamp_y_train = rm.fit_resample(FFT_X_train, y_train)

#recap dataset after oversampling
print("After oversampling, counts of label '1': {}".format(sum(overSamp_y_train==1)))
print("After oversampling, counts of label '0': {}".format(sum(overSamp_y_train==0)))


In [None]:
#Reshape data for CNN
overSamp_X_train = np.asarray(overSamp_X_train)
FFT_X_test = np.asarray(FFT_X_test)

overSamp_X_train_cnn = overSamp_X_train.reshape((overSamp_X_train.shape[0], overSamp_X_train.shape[1], 1))
FFT_X_test_cnn = FFT_X_test.reshape((FFT_X_test.shape[0], FFT_X_test.shape[1], 1))


In [None]:
#create F.C.N model and run it
model = Models.FCN_model(len_seq)

model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),metrics=['accuracy'])

print(model.summary())

history = model.fit(overSamp_X_train_cnn, overSamp_y_train , epochs=10, batch_size = 10, validation_data=(FFT_X_test_cnn, y_test))



In [None]:
acc = history.history['accuracy']
#acc_val = history.history['val_accuracy']
epochs = range(1, len(acc)+1)
plt.plot(epochs, acc, 'b', label='accuracy_train')
#plt.plot(epochs, acc_val, 'g', label='accuracy_val')
plt.title('accuracy')
plt.xlabel('epochs')
plt.ylabel('value of accuracy')
plt.legend()
plt.grid()
plt.show()

loss = history.history['loss']
#loss_val = history.history['val_loss']
epochs = range(1, len(acc)+1)
plt.plot(epochs, loss, 'b', label='loss_train')
#plt.plot(epochs, loss_val, 'g', label='loss_val')
plt.title('loss')
plt.xlabel('epochs')
plt.ylabel('value of loss')
plt.legend()
plt.grid()
plt.show()

In [None]:
#predict the test set and plot results
y_test_pred = model.predict(FFT_X_test_cnn)
y_test_pred = (y_test_pred > 0.5)


accuracy = accuracy_score(y_test, y_test_pred)
print("accuracy : ", accuracy)

print(classification_report(y_test, y_test_pred, target_names=["NO exoplanet confirmed","YES exoplanet confirmed"]))