In [118]:
from imblearn.ensemble import BalancedRandomForestClassifier
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import RFE
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from imblearn.keras import balanced_batch_generator
from imblearn.over_sampling import RandomOverSampler
import seaborn as sns
import tensorflow as tf
%matplotlib inline

In [98]:
features = pd.read_pickle("train_features.pkl")
labels = pd.read_csv("train_labels.csv",index_col="challenge_oid")
features.index.name = "challenge_oid"
features = features.join(labels)

In [99]:
labels = features["classALeRCE"]
del features["classALeRCE"] 

In [100]:
maps = {"SNIa":0, "SNII":1, "SNIbc":2, "RRL":3, 
             "EB":4, "DSCT":5, "Ceph":6, "Periodic-Other":7, 
             "AGN":8, "QSO":9, "YSO":10, "CV/Nova":11, "LPV":12, "Blazar":13}

colums = ["SNIa", "SNII", "SNIbc", "RRL", 
             "EB", "DSCT", "Ceph", "Periodic-Other", 
             "AGN", "QSO", "YSO", "CV/Nova", "LPV", "Blazar"]

In [101]:
new_classes = []
for old_class in labels.values:
    new_classes.append(maps[old_class])

In [150]:
inputs = tf.keras.layers.Input(len(features.columns)) 

layer1 = tf.keras.layers.Dense(128,activation="relu")(inputs)
layer2 = tf.keras.layers.Dense(64,activation="relu")(layer1)
layer3 = tf.keras.layers.Dense(32,activation="relu")(layer2)

layer_transient_1 = tf.keras.layers.Dense(16,activation="relu")(layer3)
layer_periodic_1 = tf.keras.layers.Dense(16,activation="relu")(layer3)
layer_stochastic_1 = tf.keras.layers.Dense(16,activation="relu")(layer3)


layer_transient_2 = tf.keras.layers.Dense(3,activation="sigmoid")(layer_transient_1)
layer_periodic_2 = tf.keras.layers.Dense(5,activation="sigmoid")(layer_periodic_1)
layer_stochastic_2 = tf.keras.layers.Dense(6,activation="sigmoid")(layer_stochastic_1)

outputs = tf.keras.layers.Concatenate()([layer_transient_2,layer_periodic_2, layer_stochastic_2])
outputs = tf.keras.layers.Softmax()(outputs)

model = tf.keras.models.Model(inputs=inputs, outputs=outputs)

In [151]:
minmax = MinMaxScaler()
minmax.fit(features)
scaled_features = minmax.transform(features)
scaled_features = scaled_features+1
scaled_features = np.nan_to_num(scaled_features)-1 

In [152]:
y = np.array(new_classes)
y = tf.keras.utils.to_categorical(y)

In [153]:
training_generator, steps_per_epoch = balanced_batch_generator(
    scaled_features, y, sampler=RandomOverSampler(), batch_size=50, random_state=42)

In [None]:
model.compile(optimizer="adam", loss="categorical_crossentropy",metrics=["accuracy"])
model.fit_generator(generator=training_generator,
                                       steps_per_epoch=100,
                                       epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50

In [None]:
test_features = pd.read_pickle("test_features.pkl")

In [None]:
scaled_test_features = minmax.transform(test_features)
scaled_test_features = scaled_test_features+1
scaled_test_features = np.nan_to_num(scaled_test_features)-1 

In [None]:
probs = model.predict(scaled_test_features)

In [None]:
probs = pd.DataFrame(probs, columns=colums)

In [None]:
probs["Outlier"] = 0

In [None]:
probs.index = test_features.index

In [None]:
probs