In [1]:
import pandas as pd
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, models
import numpy as np
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
train = pd.read_csv('train.csv',index_col=0)
test = pd.read_csv('test.csv',index_col=0)

In [3]:
train.fillna(train.mean(), inplace=True)

In [4]:
train = shuffle(train, random_state=5001)

In [5]:
train.reset_index(inplace=True,drop=True)

In [6]:
y_train = train.pop("label")
x_train = train

In [7]:
y_train.value_counts()

0    58
1    29
Name: label, dtype: int64

In [8]:
# configurations
lr = 0.01
batch_size = 8
epochs = 11

In [9]:
x_train.shape

(87, 11)

In [10]:
normalizer = tf.keras.layers.experimental.preprocessing.Normalization(axis=-1)

In [11]:
normalizer.adapt(np.array(x_train))

In [12]:
model = tf.keras.Sequential([
    normalizer,
    tf.keras.Input(shape=(11,)),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.1),
    layers.Dense(1)
])
checkpoint_filepath = "model_weights_2.hdf5"
optimizer = Adam(learning_rate=lr)
checkpointer = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor="loss",
    verbose=0,
    save_best_only=False)
criterion = tf.losses.BinaryCrossentropy(from_logits=True)

model.compile(loss = criterion,
             optimizer = optimizer,
             metrics=['accuracy'])

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
normalization (Normalization (None, 11)                23        
_________________________________________________________________
input_1 (InputLayer)         multiple                  0         
_________________________________________________________________
dense (Dense)                (None, 64)                768       
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
Total params: 856
Trainable params: 833
Non-trainable params: 23
_________________________________________________________________


In [14]:
history = model.fit(x_train,y_train,epochs=epochs,batch_size=batch_size,callbacks=[checkpointer])

Epoch 1/11
Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11


In [15]:
model.load_weights(checkpoint_filepath)

In [17]:
pred = model.predict(x_train)

In [18]:
pred_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Activation('sigmoid')
])
pred_model.compile()

In [26]:
y_sigmoid = pred_model(x_train)

In [27]:
threshold = 0.5

In [28]:
output = tf.squeeze(tf.where(y_sigmoid > threshold, 1, 0),-1)
output

<tf.Tensor: shape=(87,), dtype=int32, numpy=
array([0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])>

In [32]:
np.mean(np.equal(y_train,output))

0.9310344827586207

In [33]:
y_pred = pred_model(test)

In [34]:
pred_output = tf.squeeze(tf.where(y_pred > threshold, 1, 0),-1)
pred_output

<tf.Tensor: shape=(59,), dtype=int32, numpy=
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])>

In [43]:
pred_output = pred_output.numpy()

In [44]:
pred_output

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])

In [39]:
df = pd.read_csv('sample_submission.csv',index_col=0)

In [46]:
for i in range(len(pred_output)):
    df.iat[i,0] = pred_output[i]

In [48]:
df.to_csv('submission.csv')