In [None]:
import pandas as pd
from sklearn.metrics import log_loss
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras_tuner.tuners import RandomSearch

In [None]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [None]:
test.Stage.value_counts()

In [None]:
train_id = train['id']
train.drop('id', axis=1, inplace=True)

test_id = test['id']
test.drop('id', axis=1, inplace=True)

TARGET = 'Status'

In [None]:
names_map = {
 'C': 0,
 'CL': 1,
 'D': 2,
 1.0: 'one',
 2.0:'two',
 3.0:'three',
 4.0:'four'
}

In [None]:
train[TARGET] = train[TARGET].replace(names_map)
train['Stage'] = train['Stage'].replace(names_map)
test['Stage'] = test['Stage'].replace(names_map)

In [None]:
scaler = MinMaxScaler()

train_ = train.drop(columns=[TARGET])

numeric_cols = train_.select_dtypes(include=['int64', 'float64']).columns

scaler.fit(train[numeric_cols])

train[numeric_cols] = scaler.transform(train[numeric_cols])
test[numeric_cols] = scaler.transform(test[numeric_cols])

del(train_)

In [None]:
Status = train[TARGET]

train = pd.get_dummies(train.drop(columns='Status'), drop_first=True)
train[TARGET] = Status

test = pd.get_dummies(test, drop_first=True)

In [None]:
train

In [None]:
y = train[TARGET]
X = train.drop(columns=[TARGET])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.1, random_state=42)

In [None]:
encoder = OneHotEncoder(sparse=False, categories='auto')
y_encoded = encoder.fit_transform(y.values.reshape(-1, 1))
y_train_encoded = encoder.transform(y_train.values.reshape(-1, 1))
y_test_encoded = encoder.transform(y_test.values.reshape(-1, 1))

In [None]:
callbacks_list = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=30, verbose=2, mode='min',restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=3, min_lr=0.00001),
    tf.keras.callbacks.TerminateOnNaN()
]

In [None]:
def create_model():
    def build_model(hp):

        model = tf.keras.models.Sequential()

        for i in range(hp.Int('num_layers', 2, 20)):
            model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i), min_value=8, max_value=128, step=8), 
                            activation=hp.Choice('activation_' + str(i), values=['relu', 'tanh', 'sigmoid', 'softmax'])))
            model.add(tf.keras.layers.BatchNormalization())
            model.add(tf.keras.layers.Dropout(rate=hp.Float('dropout_' + str(i), min_value=0.0, max_value=0.5, step=0.05)))

        model.add(tf.keras.layers.Dense(3, activation='softmax'))

        optimizer = tf.keras.optimizers.Adam(
            learning_rate=hp.Float('learning_rate', min_value=0.00001, max_value=0.1, sampling='log')
            )

        model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

        return model

    tuner = RandomSearch(
        build_model,
        objective='val_loss',
        seed=42,
        max_trials=10,
        directory='tuner_dir',
        project_name='my_tuner')
    
    tuner.search(X_train.astype('float32'), y_train_encoded.astype('float32'), 
                validation_data=(X_test.astype('float32'), y_test_encoded.astype('float32')), 
                epochs=100, batch_size=128, callbacks=callbacks_list, validation_split=0.1)

    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    model = tuner.hypermodel.build(best_hps)

    return model

In [None]:
nn_model = create_model()

history = nn_model.fit(X_train.astype('float32'), y_train_encoded.astype('float32'),
            epochs=100,
            callbacks=callbacks_list,
            validation_split=0.1)

In [None]:
y_pred = nn_model.predict(X_test.astype('float32'))

In [None]:
log_loss(y_test_encoded, y_pred)

In [None]:
submission = pd.read_csv('sample_submission.csv')

submission[submission.columns[0]] = test_id

submission[submission.columns[1:]] = nn_model.predict(test.astype('float32'))

In [None]:
submission.to_csv('submission.csv', index = False)