### Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.metrics import r2_score

### Reading the Data

In [None]:
data = pd.read_csv(r'/kaggle/input/playground-series-s4e5/train.csv')
test = pd.read_csv(r'/kaggle/input/playground-series-s4e5/test.csv')
train = data.copy()

### EDA and Preprocessing

In [None]:
train.head()

##### Set the index with the ID column

In [None]:
train.set_index(train['id'], inplace=True)
train.drop(columns=['id'], inplace=True)
train.head()

In [None]:
# do the same for the test
test.set_index(test['id'], inplace=True)
test.drop(columns=['id'], inplace=True)
test.head()

In [None]:
train.info()

In [None]:
test.info()

* ##### The data has no null values

#### X, y definition

In [None]:
X = train.iloc[:, :-1]
y = train.iloc[:, -1]
X.head()

#### Train Val Splitting

In [None]:
X_train,X_val,y_train,y_val = train_test_split(X,y, test_size=0.1, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

### Building Model Architecture 

In [None]:
model = Sequential([
    Dense(64, activation='relu', input_dim=X_train.shape[1]),
    Dropout(0.1),
    
    Dense(32, activation='relu'),
    Dropout(0.1),
    
    Dense(16, activation='relu'),
    Dense(1, activation='linear'),

])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['r2_score'])
model.summary()

In [None]:
call_back = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True , verbose=0)

In [None]:
history = model.fit(X_train, y_train, epochs=100, batch_size=64, callbacks= call_back , validation_data=(X_val, y_val), verbose=1)

### Plotting Model Loss Through the Epochs

In [None]:
# Define needed variables
tr_loss = history.history['loss']
val_loss = history.history['val_loss']
index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]

Epochs = [i+1 for i in range(len(tr_loss))]
loss_label = f'best epoch= {str(index_loss + 1)}'

# Plot training history
plt.figure(figsize= (20, 8))
plt.style.use('fivethirtyeight')

plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout
plt.show()


##### Printing the maximum shown value in the validation r2_score

In [None]:
max(history.history['val_r2_score'])

In [None]:
model2 = Sequential([
    Dense(32, activation='relu', input_dim=X_train.shape[1]),
    Dropout(0.1),
    
    Dense(16, activation='relu'),
    Dense(1, activation='linear'),

])

model2.compile(optimizer=Adam(learning_rate=0.0001), loss='mean_squared_error', metrics=['r2_score'])
model2.summary()

In [None]:
call_back2 = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True , verbose=0)

In [None]:
history2 = model2.fit(X_train, y_train, epochs=100, batch_size=64, callbacks= call_back2 , validation_data=(X_val, y_val), verbose=1)

In [None]:
# Define needed variables
tr_loss = history2.history['loss']
val_loss = history2.history['val_loss']
index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]

Epochs = [i+1 for i in range(len(tr_loss))]
loss_label = f'best epoch= {str(index_loss + 1)}'

# Plot training history
plt.figure(figsize= (20, 8))
plt.style.use('fivethirtyeight')

plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout
plt.show()


In [None]:
max(history2.history['val_r2_score'])

In [None]:
model3 = Sequential([
    Dense(16, activation='relu', input_dim=X_train.shape[1]),    
    Dense(8, activation='relu'),
    Dense(1, activation='linear'),

])

model3.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['r2_score'])
model3.summary()

In [None]:
call_back3 = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True , verbose=0)

In [None]:
history3 = model3.fit(X_train, y_train, epochs=100, batch_size=64, callbacks= call_back3 , validation_data=(X_val, y_val), verbose=1)

In [None]:
# Define needed variables
tr_loss = history3.history['loss']
val_loss = history3.history['val_loss']
index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]

Epochs = [i+1 for i in range(len(tr_loss))]
loss_label = f'best epoch= {str(index_loss + 1)}'

# Plot training history
plt.figure(figsize= (20, 8))
plt.style.use('fivethirtyeight')

plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout
plt.show()


In [None]:
max(history3.history['val_r2_score'])

In [None]:
model4 = Sequential([
    Dense(8, activation='relu', input_dim=X_train.shape[1]),    
    Dense(1, activation='linear'),

])

model4.compile(optimizer=SGD(learning_rate=0.001), loss='mean_squared_error', metrics=['r2_score'])
model4.summary()

In [None]:
call_back4 = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True , verbose=0)

In [None]:
history4 = model4.fit(X_train, y_train, epochs=100, batch_size=32, callbacks= call_back4 , validation_data=(X_val, y_val), verbose=1)

In [None]:
# Define needed variables
tr_loss = history4.history['loss']
val_loss = history4.history['val_loss']
index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]

Epochs = [i+1 for i in range(len(tr_loss))]
loss_label = f'best epoch= {str(index_loss + 1)}'

# Plot training history
plt.figure(figsize= (20, 8))
plt.style.use('fivethirtyeight')

plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout
plt.show()


In [None]:
max(history4.history['val_r2_score'])

**Models Hyperparameters Tuning:**

Model1: 

    - The arch was 4 dense layers with 2 (0.1) dropout layers 
    - Used ADAM optimizer with learning rate 0.001
    -> the model took 54 epochs with batch_size = 64 to use the callback
    -> it has a maximum validation r2_score = -0.2252
    
Model2: 

    - The arch was 3 dense layers with 1 (0.1) dropout layer
    - Used ADAM optimizer with learning rate 0.0001
    -> the model took 8 epochs with batch_size = 64 to use the callback
    -> it has a maximum validation r2_score = 0.436
    
Model3: 

    - The arch was 3 dense layers with no dropout layer
    - Used ADAM optimizer with learning rate 0.001
    -> the model took 16 epochs with batch_size = 64 to use the callback 
    -> it has a maximum validation r2_score = 0.8448

Model4: 

    - The arch was 2 dense layers with no dropout layer
    - Used SGD optimizer with learning rate 0.001
    -> the model took 46 epochs with batch_size = 32 to use the callback 
    -> it has a maximum validation r2_score = 0.8442
    
    
* After combaring the results, we could use the third model to predict our test data

In [None]:
y_pred = model3.predict(test)
y_pred


**Prepare the submission File**

In [None]:
# make y_pred a 1d array
y_pred = y_pred.reshape(y_pred.shape[0])

In [None]:
submission = pd.DataFrame({'id': test.index.values, 'FloodProbability': y_pred}, columns=['id', 'FloodProbability'])

submission.to_csv('submission.csv', index=False)

**THANK YOU!**