In [None]:
import  numpy as np 
import tensorflow as tf 
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.preprocessing import StandardScaler

In [None]:
df_train = pd.read_csv("../input/tabular-playground-series-may-2022/train.csv")
df_train.head()

In [None]:
df_train.corr().tail(1)

In [None]:
f = plt.figure(figsize=(19, 15))
plt.matshow(df_train.corr(), fignum=f.number)
plt.xticks(range(df_train.select_dtypes(['number']).shape[1]), df_train.select_dtypes(['number']).columns, fontsize=14, rotation=45)
plt.yticks(range(df_train.select_dtypes(['number']).shape[1]), df_train.select_dtypes(['number']).columns, fontsize=14)
cb = plt.colorbar()
cb.ax.tick_params(labelsize=14)
plt.title('Correlation Matrix', fontsize=16);

In [None]:
df_train.info()

In [None]:
df_test = pd.read_csv("../input/tabular-playground-series-may-2022/test.csv")

In [None]:
for df in [df_train, df_test]:
    for i in range(10):
        df[f'ch{i}'] = df.f_27.str.get(i).apply(ord) - ord('A')
        
    df["unique_characters"] = df.f_27.apply(lambda s: len(set(s)))
    
features = [f for f in df_test.columns if f != 'id' and f != 'f_27']

In [None]:
X_train = df_train.drop(['target'],axis=1)[features]
Y_train = df_train['target'].to_numpy()
X_test = df_test[features].copy()

In [None]:
X_test.head()

In [None]:
#Se puede realizar un normalizado, o ver las features y procesarlos para que tengan una distribución normal. 
StSc = StandardScaler()
X_train = StSc.fit_transform(X_train)
X_test  = StSc.transform(X_test)

In [None]:
print(X_train[0:5])
print(Y_train[0:5])

In [None]:
X_train.shape

In [None]:
X_test.shape

model_class = tf.keras.models.Sequential([
    tf.keras.layers.Input(41),
    tf.keras.layers.Dense(31, kernel_regularizer=tf.keras.regularizers.l2(30e-6),activation='swish'),
    tf.keras.layers.Dense(64, kernel_regularizer=tf.keras.regularizers.l2(30e-6),activation='swish'),
    tf.keras.layers.Dense(128, kernel_regularizer=tf.keras.regularizers.l2(30e-6),activation='swish'),
    #tf.keras.layers.Dropout(0.05),
    tf.keras.layers.Dense(64, kernel_regularizer=tf.keras.regularizers.l2(30e-6),activation='swish'),
    #tf.keras.layers.Dropout(0.05),
    tf.keras.layers.Dense(32, kernel_regularizer=tf.keras.regularizers.l2(30e-6),activation='swish'),
    tf.keras.layers.Dense(16, kernel_regularizer=tf.keras.regularizers.l2(30e-6),activation='swish'),
    tf.keras.layers.Dense(1,activation = 'sigmoid')
])

In [None]:
#activacion tanh dado que hay valores negativos, sino, normalizar con min max.
L2 = 0.000003
model_class = tf.keras.models.Sequential([
    tf.keras.layers.Input(41),
    tf.keras.layers.Dense(82, kernel_regularizer=tf.keras.regularizers.l2(L2),activation='swish'),
    tf.keras.layers.Dense(82, kernel_regularizer=tf.keras.regularizers.l2(L2),activation='swish'),
    tf.keras.layers.Dense(82, kernel_regularizer=tf.keras.regularizers.l2(L2),activation='swish'),
    tf.keras.layers.Dense(41, kernel_regularizer=tf.keras.regularizers.l2(L2),activation='swish'),
    tf.keras.layers.Dense(1,activation = 'sigmoid')
])


In [None]:
#opt = tf.keras.optimizers.SGD(learning_rate=0.05, momentum=0.3, nesterov=False, name="SGD")
loss = tf.keras.losses.BinaryCrossentropy()
opt= tf.keras.optimizers.Adam()
model_class.compile(optimizer = opt,loss = loss,metrics = [tf.keras.metrics.BinaryAccuracy(),
                                                                               tf.keras.metrics.Precision(),
                                                                               tf.keras.metrics.Recall()])


In [None]:
earlystopping = tf.keras.callbacks.EarlyStopping( monitor= 'val_loss', patience=6, verbose=0,mode='auto', baseline=None, restore_best_weights=True)

LR = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=0,mode='auto')

In [None]:
history = model_class.fit(
    x=X_train,
    y=Y_train,
    batch_size=500,
    epochs=300,
    verbose=1,
    callbacks=[LR,earlystopping],
    validation_split=0.1,
    validation_data=None,
    shuffle=False,
    class_weight=None,
    sample_weight=None,
    initial_epoch=0,
    steps_per_epoch=1800,
    validation_steps=None,
    validation_batch_size=None,
    validation_freq=1,
    max_queue_size=10,
    workers=1,
    use_multiprocessing=False,
)

In [None]:
acc_train = history.history['binary_accuracy']
acc_val  =  history.history['val_binary_accuracy']

epochs = range(len(acc_train))
plt.plot(epochs, acc_train, 'r', label='Training accuracy')
plt.plot(epochs, acc_val, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()
plt.show()

In [None]:
pred = model_class.predict(X_test)
df_test['target'] = pred
submit   = df_test[['id','target']]
submit.to_csv("subsission.csv",index=False)