In [5]:
import numpy as np
import numpy.ma as ma
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from habitablePlanets import habitable_planets

exoplanets = pd.read_csv("exoplanets3.csv", sep=",")
training_data = np.array(exoplanets[:])

'''print(training_data.shape)
print(training_data)'''

habitable_x = np.array([np.array(training_data[0, :])])
habitable_y = np.array([0])
uninhabitable_x = np.array([np.array(training_data[0, :])])
uninhabitable_y = np.array([0])

for i in training_data:
    if i[0] in habitable_planets:
        habitable_x = np.append(habitable_x, np.array([np.array(i)]), axis=0)
        habitable_y = np.append(habitable_y, 1)

for i in training_data:
    if i[0] not in habitable_planets:
        uninhabitable_x = np.append(uninhabitable_x, np.array([np.array(i)]), axis=0)
        uninhabitable_y = np.append(uninhabitable_y, 0)

training_x = np.append(habitable_x[:, 1:], uninhabitable_x[:, 1:], axis=0)
training_y = np.append(habitable_y, uninhabitable_y, axis=0)

habitable_x = habitable_x[:, 1:]
uninhabitable_x = uninhabitable_x[:, 1:]

print(habitable_x.shape, uninhabitable_x.shape, training_x.shape)
print(habitable_y.shape, uninhabitable_y.shape, training_y.shape)

from imblearn.combine import SMOTETomek
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import NearMiss

training_x = np.asarray(training_x).astype(np.float32)
training_x = np.where(np.isnan(training_x), ma.array(training_x, mask=np.isnan(training_x)).mean(axis=0), training_x)
habitable_x = np.asarray(habitable_x).astype(np.float32)
habitable_x = np.where(np.isnan(habitable_x), ma.array(habitable_x, mask=np.isnan(habitable_x)).mean(axis=0), habitable_x)
uninhabitable_x = np.asarray(uninhabitable_x).astype(np.float32)
uninhabitable_x = np.where(np.isnan(uninhabitable_x), ma.array(uninhabitable_x, mask=np.isnan(uninhabitable_x)).mean(axis=0), uninhabitable_x)

oversample = SMOTE()
x_res, y_res = oversample.fit_resample(training_x, training_y)
print(x_res.shape, y_res.shape)

from collections import Counter

print("OG dataset shape: {}".format(Counter(training_y)))
print("New dataset shape: {}".format(Counter(y_res)))

from imblearn.over_sampling import RandomOverSampler
os = SMOTE(sampling_strategy = 1)
x_train_res, y_train_res = os.fit_resample(training_x, training_y)
print(x_train_res.shape, y_train_res.shape)

from sklearn.svm import SVC

model = SVC()
model.fit(x_train_res, y_train_res)

testing_x = np.array([np.array(training_data[0, 1:])])
testing_y = np.array([np.array([0])])

testcounter = 0
for i in training_data:
    if testcounter >= 5:
        break
    if i[0] in habitable_planets:
        testcounter += 1
        testing_y = np.append(testing_y, 1)
    else:
        testing_y = np.append(testing_y, 0 )
    testing_x = np.append(testing_x, np.array([i[1:]]), axis = 0)

testing_x = np.asarray(testing_x).astype(np.float32)
testing_x = np.where(np.isnan(testing_x), ma.array(testing_x, mask=np.isnan(testing_x)).mean(axis=0), testing_x)

print(testing_x.shape, testing_y.shape)
print(testing_x, testing_y)

print(model.score(x_train_res, y_train_res))



(60, 25) (4366, 25) (4426, 25)
(60,) (4366,) (4426,)
(8734, 25) (8734,)
OG dataset shape: Counter({0: 4367, 1: 59})
New dataset shape: Counter({0: 4367, 1: 4367})
(8734, 25) (8734,)
(175, 25) (175,)
[[  2.           1.           0.         ...   1.20000005  64.01462027
   43.36999893]
 [  2.           1.           0.         ...   1.20000005  64.01462027
   43.36999893]
 [  1.           1.           0.         ...   1.5         64.01462027
  -17.52000046]
 ...
 [  1.           3.           0.         ...   4.94950375  64.01462027
  -15.06999969]
 [  1.           1.           0.         ...   4.94950375  64.01462027
   41.61999893]
 [  2.           2.           0.         ...   1.          64.01462027
    4.01999998]] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

In [4]:
test_x_2 = np.array([np.array(training_data[0, 1:])])
for i in training_data:
    if i[0] == "Teegarden's Star b":
        test_x_2 = np.append(test_x_2, np.array([i[1:]]), axis = 0)
        break

test_x_2 = np.asarray(test_x_2).astype(np.float32)
test_x_2 = np.where(np.isnan(test_x_2), ma.array(test_x_2, mask=np.isnan(test_x_2)).mean(axis=0), test_x_2)

test_y_2 = np.array([0, 1])

print(model.score(test_x_2, test_y_2))

1.0
