# Import Packages
Lets load all the needed packages for this notebook:

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

In [None]:
import tensorflow as tf
tf.__version__

# The Dataset
For this notebook we will use Tabular Playground Series - Mar 2021.

Let's define the path to the dataset:

In [None]:
data = pd.read_csv('../input/tabular-playground-series-mar-2021/train.csv')

In [None]:
data.info()

# Quick Look at the Data
Let’s take a look at the top five rows:

In [None]:
data.head()

In [None]:
data.drop('id', axis = 1, inplace=True)

# Check if there is null values

In [None]:
data.isna().sum()

# checking wheather if the target data is balanced or not.

In [None]:
sns.countplot(data['target'], palette="bwr")
plt.show()
data['target'].value_counts(normalize=True);

as we can see data here is not balanced!

# Split Data

In [None]:
X = data.drop('target', axis=1)
y = data['target'].to_numpy()

X.shape, y.shape

In [None]:
X[:5]

In [None]:
cat_columns = ['cat' + str(i) for i in range(19)]
X[cat_columns].nunique().sort_values()

# Label encode:

In [None]:
from sklearn.preprocessing import LabelEncoder

le_cols = ['cat0', 'cat11', 'cat12', 'cat13', 'cat14', 'cat1', 'cat2', 'cat3', 'cat4', 'cat6', 'cat9', 'cat5', 'cat7', 'cat8'
               , 'cat10', 'cat15', 'cat16', 'cat17', 'cat18']

def convert2num(X_new):

    label_encoder = LabelEncoder()

    for col in le_cols:
        X_new[col] = label_encoder.fit_transform(X_new[col])
        
    return X_new

In [None]:
X = convert2num(X)

X[le_cols].head()

In [None]:
X.head()

In [None]:
X = X.drop(['cat13', 'cont10', 'cont2'], axis = 1)

# Feature scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)

In [None]:
X[:1]

# Splitting traning set

In [None]:
from sklearn.model_selection import train_test_split

tf.random.set_seed(42)

X_train_vaild, X_test, y_train_vaild, y_test = train_test_split(X, y, test_size=0.01, random_state=42)

X_train_vaild.shape, X_test.shape, y_train_vaild.shape, y_test.shape

In [None]:
tf.random.set_seed(42)

X_train, X_valid, y_train, y_valid = train_test_split(X_train_vaild, y_train_vaild, test_size=0.25, random_state=42)

X_train.shape, X_valid.shape, y_train.shape, y_valid.shape

# Building and Training our model

In [None]:
# let's build a model to find patterns in it

# Set random seed
tf.random.set_seed(42)

# 1. Create a model
model_1 = tf.keras.Sequential([
           tf.keras.layers.Dense(20, input_dim=27, activation='relu'),
           tf.keras.layers.Dense(15, activation='relu'),
           tf.keras.layers.Dense(8, activation='relu'),         
           tf.keras.layers.Dense(2, activation='softmax')
])

# 2. Comile the model
model_1.compile(loss=tf.keras.losses.BinaryCrossentropy(),
                 optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                 metrics=['accuracy'])

# 3. Fit the model
history = model_1.fit(X_train, 
                      tf.one_hot(y_train, depth=2), 
                      epochs=20,
                      verbose = 1,
                      validation_data=(X_valid, tf.one_hot(y_valid, depth=2)))

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.0, 1.0])
plt.legend(loc='lower right');

In [None]:
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label = 'val_loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.ylim([0.0, 1])
plt.legend(loc='upper right');

# Testing the model

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix

plt.figure(figsize=(10, 8))
sns.heatmap(confusion_matrix(y_true=y_test, 
                 y_pred=model_1.predict(X_test).argmax(axis=1)), annot=True,
                 fmt="d");

In [None]:
model_1.evaluate(X_test, tf.one_hot(y_test, depth=2))[1] * 100

In [None]:
model_1.summary()

In [None]:
# Let's check out a way of viewing our deep learning models
from tensorflow.keras.utils import plot_model

# See the inputs and outputs of each layer
plot_model(model_1, show_shapes=True)

# Test Data

In [None]:
test_path = "/kaggle/input/tabular-playground-series-mar-2021/test.csv"
df_test = pd.read_csv(test_path)

In [None]:
test_passengerIds = df_test['id'].values
df_test.drop('id', axis = 1, inplace=True)
df_test.head()

In [None]:
X_new = convert2num(df_test)

X_new[le_cols].head()

In [None]:
X_new = X_new.drop(['cat13', 'cont10', 'cont2'], axis = 1)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_new = sc.fit_transform(X_new)

In [None]:
X_new[:1]

In [None]:
y_pred = model_1.predict(X_new).argmax(axis = 1)

In [None]:
y_pred[:5]

In [None]:
y_pred.shape

In [None]:
test_passengerIds.shape

In [None]:
output = pd.DataFrame({'id':test_passengerIds, 'target': y_pred})
output.to_csv('submission.csv', index=False)

In [None]:
output