# Becomexpert AI Course: HW4 -- Sajad Chelabi

# ++++++++++++++++++++++++
## Part 1: Customer Churn Prediction
# ++++++++++++++++++++++++
### Build a fully connected Neural Network to predict whether a customer will exit the company regarding a set of measured features.

In [1]:
%matplotlib notebook
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import zipfile
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop

## Import Data
### Read 'Churn_Modelling.csv' dataset with pandas

In [2]:
df = pd.read_csv('Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


## ************************************************
## Task 1: Clean and Preprocess the Data
## ************************************************
### Handle duplicates: Drop any duplicated data from dataframe and reset index.

In [3]:
df.drop_duplicates(inplace=True)
df.reset_index(drop=True, inplace=True)

### Statistics: Check data description such as mean, minimum, and maximum values of each feature.

In [4]:
df.describe()

Unnamed: 0,RowNumber,CustomerId,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,5000.5,15690940.0,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,2886.89568,71936.19,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,1.0,15565700.0,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,2500.75,15628530.0,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,5000.5,15690740.0,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,7500.25,15753230.0,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,10000.0,15815690.0,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


### Data types: Check data types of each column to make sure about the correct types for any of them.

In [5]:
df.dtypes

RowNumber            int64
CustomerId           int64
Surname             object
CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

#### So, categorical values are object and numerical values are in int and float types which are correct.

### Handle null values: Check any missed data and null values in the dataset to remove it or change to some value such as mean of that column.

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


#### According to the above information, there is not any null values in the dataset.

### Drop not related columns from the dataframe
#### 'RowNumber', 'CustomerId', and 'Surname' are have not any relation to the prediction and act like an ID. So, make the dataframe more effective and cleaner by dropping these columns.

In [7]:
df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)
df.head(3)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1


### Change categorized featuers into the numerical values such as 'Geography' and 'Gender'

In [8]:
df.Geography.value_counts().to_frame()

Unnamed: 0_level_0,count
Geography,Unnamed: 1_level_1
France,5014
Germany,2509
Spain,2477


#### There is only exist three classes of geography that allows us to use one-hot encoding method without make the dataframe columns too long.

### One-hot encoding

In [9]:
df = df.join(pd.get_dummies(df.Geography).astype(int)).drop('Geography', axis=1)

In [10]:
df.Gender.value_counts().to_frame()

Unnamed: 0_level_0,count
Gender,Unnamed: 1_level_1
Male,5457
Female,4543


#### Gender includes only two category which allows us to utilize binary encoding method and make it numerical easily.

### Binary encoding

In [11]:
df.Gender = df.Gender.apply(lambda x: 1 if x == 'Male' else 0)
df.head(3)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,France,Germany,Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0,0,1
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1,0,0


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CreditScore      10000 non-null  int64  
 1   Gender           10000 non-null  int64  
 2   Age              10000 non-null  int64  
 3   Tenure           10000 non-null  int64  
 4   Balance          10000 non-null  float64
 5   NumOfProducts    10000 non-null  int64  
 6   HasCrCard        10000 non-null  int64  
 7   IsActiveMember   10000 non-null  int64  
 8   EstimatedSalary  10000 non-null  float64
 9   Exited           10000 non-null  int64  
 10  France           10000 non-null  int32  
 11  Germany          10000 non-null  int32  
 12  Spain            10000 non-null  int32  
dtypes: float64(2), int32(3), int64(8)
memory usage: 898.6 KB


### Data type correction

In [13]:
df.CreditScore = df.CreditScore.astype(np.int16)
df.Gender = df.Gender.astype(np.int8)
df.Age = df.Age.astype(np.int8)
df.Tenure = df.Tenure.astype(np.int8)
df.NumOfProducts = df.NumOfProducts.astype(np.int8)
df.HasCrCard = df.HasCrCard.astype(np.int8)
df.IsActiveMember = df.IsActiveMember.astype(np.int8)
df.EstimatedSalary = df.EstimatedSalary.astype(np.float32)
df.Exited = df.Exited.astype(np.int8)
df.France = df.France.astype(np.int8)
df.Germany = df.Germany.astype(np.int8)
df.Spain = df.Spain.astype(np.int8)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CreditScore      10000 non-null  int16  
 1   Gender           10000 non-null  int8   
 2   Age              10000 non-null  int8   
 3   Tenure           10000 non-null  int8   
 4   Balance          10000 non-null  float64
 5   NumOfProducts    10000 non-null  int8   
 6   HasCrCard        10000 non-null  int8   
 7   IsActiveMember   10000 non-null  int8   
 8   EstimatedSalary  10000 non-null  float32
 9   Exited           10000 non-null  int8   
 10  France           10000 non-null  int8   
 11  Germany          10000 non-null  int8   
 12  Spain            10000 non-null  int8   
dtypes: float32(1), float64(1), int16(1), int8(10)
memory usage: 234.5 KB


#### So, all values are non-null and numerical. The final dataset contains 12 cleaned and preprocessed features and one target.

## *******************************
## Task 2: Create the Model
## *******************************
### Construct and train a fully connected Neural Network for predicting the customer churn and optimize it.

### Split the data: Create input(X) and output(y) of the model, then using 20% of the data for the test set and 80% for the train set.

In [14]:
X = df.drop('Exited', axis=1)     # Features
y = df.Exited                     # Target

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### Data normalization: Standardize data with StandardScaler instance before build the model.

In [16]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Build fully connected NN: Input layer with 64 neurons, hidden layer with 64 neurons, and output layer with 1 neuron which classify the output into 0 and 1 classes related to the target.

In [17]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=[12]),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')    # Using sigmoid to produce 0 and 1 for binary classification
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Compile the model: Using adam optimizer, binary cross entropy loss function, and accuracy metrics for evaluation.

In [18]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

### Train the model: Create history object to visualize the model performance.

In [19]:
history = model.fit(X_train, y_train, epochs=200, validation_data=(X_test, y_test))

Epoch 1/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.7911 - loss: 0.4823 - val_accuracy: 0.8485 - val_loss: 0.3850
Epoch 2/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8391 - loss: 0.3834 - val_accuracy: 0.8620 - val_loss: 0.3489
Epoch 3/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8584 - loss: 0.3485 - val_accuracy: 0.8650 - val_loss: 0.3431
Epoch 4/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8634 - loss: 0.3378 - val_accuracy: 0.8515 - val_loss: 0.3475
Epoch 5/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8571 - loss: 0.3377 - val_accuracy: 0.8665 - val_loss: 0.3404
Epoch 6/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8621 - loss: 0.3381 - val_accuracy: 0.8530 - val_loss: 0.3414
Epoch 7/200
[1m250/25

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8891 - loss: 0.2672 - val_accuracy: 0.8545 - val_loss: 0.3788
Epoch 52/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8889 - loss: 0.2619 - val_accuracy: 0.8475 - val_loss: 0.3766
Epoch 53/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8912 - loss: 0.2602 - val_accuracy: 0.8575 - val_loss: 0.3726
Epoch 54/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8879 - loss: 0.2660 - val_accuracy: 0.8525 - val_loss: 0.3715
Epoch 55/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8947 - loss: 0.2578 - val_accuracy: 0.8445 - val_loss: 0.3779
Epoch 56/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8938 - loss: 0.2518 - val_accuracy: 0.8545 - val_loss: 0.3772
Epoch 57/200
[1m250/250[0m 

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9220 - loss: 0.1938 - val_accuracy: 0.8415 - val_loss: 0.4405
Epoch 102/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9180 - loss: 0.1979 - val_accuracy: 0.8380 - val_loss: 0.4473
Epoch 103/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9126 - loss: 0.2043 - val_accuracy: 0.8250 - val_loss: 0.4541
Epoch 104/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9166 - loss: 0.1996 - val_accuracy: 0.8345 - val_loss: 0.4502
Epoch 105/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9151 - loss: 0.2026 - val_accuracy: 0.8355 - val_loss: 0.4625
Epoch 106/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9157 - loss: 0.2021 - val_accuracy: 0.8320 - val_loss: 0.4563
Epoch 107/200
[1m250/25

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9376 - loss: 0.1581 - val_accuracy: 0.8260 - val_loss: 0.5397
Epoch 151/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9329 - loss: 0.1608 - val_accuracy: 0.8195 - val_loss: 0.5497
Epoch 152/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9327 - loss: 0.1555 - val_accuracy: 0.8280 - val_loss: 0.5532
Epoch 153/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9421 - loss: 0.1504 - val_accuracy: 0.8325 - val_loss: 0.5508
Epoch 154/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9304 - loss: 0.1618 - val_accuracy: 0.8285 - val_loss: 0.5590
Epoch 155/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9341 - loss: 0.1539 - val_accuracy: 0.8230 - val_loss: 0.5505
Epoch 156/200
[1m250/25

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9445 - loss: 0.1316 - val_accuracy: 0.8260 - val_loss: 0.6747
Epoch 200/200
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9480 - loss: 0.1281 - val_accuracy: 0.8005 - val_loss: 0.6986


## ********************************************
## Task 3: Model Evaluation & Results
## ********************************************
### Visualize train and validation accuracy to check the model's performance and ensure the model robustness to underfitting or overfitting.

In [20]:
# Plot train and validation accuracy
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

epochs = range(1,201)

plt.figure()
plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and Validation Acuuracy')
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

### Obviously the model is going to overfitting! So, make some changes to have more robust and generalized model. Also, Change accuracy metrics and adding callbacks to stop the process after enough epochs.

In [27]:
model = tf.keras.Sequential([
    tf.keras.layers.BatchNormalization(input_shape=[12]),
    tf.keras.layers.Dense(12, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')    # Using sigmoid to produce 0 and 1 for binary classification
])

early_stopping = tf.keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test), callbacks=[early_stopping])

Epoch 1/100


  super().__init__(**kwargs)


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - binary_accuracy: 0.6386 - loss: 0.6473 - val_binary_accuracy: 0.7990 - val_loss: 0.4684
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - binary_accuracy: 0.7977 - loss: 0.4533 - val_binary_accuracy: 0.8130 - val_loss: 0.4282
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - binary_accuracy: 0.8146 - loss: 0.4298 - val_binary_accuracy: 0.8210 - val_loss: 0.4150
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - binary_accuracy: 0.8152 - loss: 0.4273 - val_binary_accuracy: 0.8290 - val_loss: 0.4025
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - binary_accuracy: 0.8267 - loss: 0.4032 - val_binary_accuracy: 0.8420 - val_loss: 0.3860
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - binary_accuracy: 0.8340 - loss: 0.3901

### Visulaize the final results

In [28]:
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="Cross-entropy")
history_df.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot(title="Accuracy")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<Axes: title={'center': 'Accuracy'}>

### It's Done! Just with 12 neuron as hidden layer the model training and validation score converged to each other smoothly. If add layers or neurons to the model, diagram of outputs are not smooth enough and going to take noisy shape, and also the probability of overfitting is going to be higher.

### So, the final model in this part achieved as above results with these hyperparameters:
#### 1- Model structured with one batch normalization layer as input, one hidden layer with only 12 neurons, and one single neuron output with sigmoid activation function in order to binary classification.
#### 2- Compiled with adam optimizer, loss function is chose binary-crossentropy as it is a binary classification problem, and also used accuracy metrics to compare the model's performance.
#### 3- Used callback to stop the process after reaching acceptable values which guaranteed to avoid underfitting and overfitting problems.

# +++++++++++++++++++++++++++
# Part 2: Concrete Crack Detection
# +++++++++++++++++++++++++++
### The datasets contain images of various concrete surfaces with and without crack. The image data is divided into two as negative (without crack) and positive (with crack) in a separate folder for image classification. Each class has 20000 images with a total of 40000 images with 227 x 227 pixels with RGB channels.

In [29]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Import Data
### Pick first 12.5% of the images (4000) as validation set. So, train_set file has 18k images and validation_set file has 2k images for each class.
### Extract zipfiles and make a directory for the images.

In [35]:
# Unzip training set
zip_ref = zipfile.ZipFile('train_set.zip')
zip_ref.extractall('train_set')

# Unzip validation set
zip_ref = zipfile.ZipFile('validation_set.zip')
zip_ref.extractall('validation_set')
zip_ref.close()

## **********************************************************************************
## Task 1 + Bonus Task: Preprocess the images + Data Augmentation
## **********************************************************************************
### Data Normalization and Augmentation with ImageDataGenerator to apply rotation, shift, and flip the images to enhance the network's performance.

In [62]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode='nearest')

valid_datagen = ImageDataGenerator(rescale=1./255)

# Flow data from directory in batches of 100
train_generator = train_datagen.flow_from_directory('train_set', 
                                                    batch_size=100,
                                                    class_mode='binary',
                                                    target_size=(227,227))

valid_generator = valid_datagen.flow_from_directory('validation_set',
                                                    batch_size=100,
                                                    class_mode='binary',
                                                    target_size=(227,227))

Found 36000 images belonging to 2 classes.
Found 4000 images belonging to 2 classes.


## ************************
## Task 2: Create CNN
## ************************
### Construct and train a convolutional neural network for detecting the surface cracks and optimize it.

In [63]:
# Model Structure (Using AlexNet structure)
model = tf.keras.models.Sequential([
    # First 11 x 11 Conv(16) layer with stride 4, and Maxpooling 3x3 with stride 2
    tf.keras.layers.Conv2D(filters=16, kernel_size=(11,11), strides=(4,4), activation='relu', input_shape=(227,227,3)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    
    # Second 5 x 5 Conv(32) layer with pad 2, and Maxpooling 3x3 with stride 2
    tf.keras.layers.Conv2D(filters=32, kernel_size=(5,5), strides=(1,1), activation='relu', padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    
    # Third three Conv layers with pad 1, and Maxpooling 3x3
    tf.keras.layers.Conv2D(filters=48, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(filters=48, kernel_size=(1,1), strides=(1,1), activation='relu', padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(filters=32, kernel_size=(1,1), strides=(1,1), activation='relu', padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(pool_size=(3,3)),
    
    # Flatten results to feed into a DNN
    tf.keras.layers.Flatten(),
    
    # Two 64 neuron hidden layers with Dropout layer with 0.5 probability
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    
    # 1 neuron output layer with sigmoid activation function in order to binary classification
    tf.keras.layers.Dense(1, activation='sigmoid')
])

### Print the model summary

In [64]:
model.summary()

### Compile the model

In [65]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])

### Train the model

In [66]:
hist = model.fit(train_generator,
         steps_per_epoch=360,
         epochs=10,
         validation_data=valid_generator,
         validation_steps=40,
         verbose=1)

Epoch 1/10
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m908s[0m 3s/step - binary_accuracy: 0.9098 - loss: 0.2425 - val_binary_accuracy: 0.8917 - val_loss: 0.3349
Epoch 2/10
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 268us/step - binary_accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/10


  self.gen.throw(typ, value, traceback)


[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m722s[0m 2s/step - binary_accuracy: 0.9889 - loss: 0.0403 - val_binary_accuracy: 0.5005 - val_loss: 10.7884
Epoch 4/10
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114us/step - binary_accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/10
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m692s[0m 2s/step - binary_accuracy: 0.9914 - loss: 0.0316 - val_binary_accuracy: 0.9908 - val_loss: 0.0253
Epoch 6/10
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65us/step - binary_accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 7/10
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m653s[0m 2s/step - binary_accuracy: 0.9926 - loss: 0.0304 - val_binary_accuracy: 0.9803 - val_loss: 0.0754
Epoch 8/10
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138us/step - binary_accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 9/10
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

## ********************************************
## Task 3: Model Evaluation & Results
## ********************************************

### Visualize train and validation accuracy, and also cross entropy loss of the model to check the model's performance.

In [77]:
# Measure train & validation loss
loss = hist.history['loss'][0::2]
val_loss = hist.history['val_loss']
epochs = range(1,6)

# Plot train & validation loss
plt.figure()
plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.xticks(epochs)
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

In [75]:
# Measure train & validation binary accuracy
acc = hist.history['binary_accuracy'][0::2]
val_acc = hist.history['val_binary_accuracy']

# Plot train & validation binary accuracy
plt.figure()
plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.xticks(epochs)
plt.title('Training and Validation Acuuracy')
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

### Completed! Because of model's complexity its take about one hour to fit the model. Finally, after completed 5 epochs the results are:
#### Training: accuracy = 0.9934 & loss = 0.0277 
#### Validation: accuracy = 0.9975 & loss = 0.0071

### So, the final model in this part achieved as above results with these features:
#### 1- Model structured in the form of AlexNet with fewer neurons.
#### 2- Compiled with adam optimizer, loss function is chose binary-crossentropy as it is a binary classification problem, and also used binary accuracy metrics to compare the model's performance.
#### 3- Used 5 epochs that each takes around 15 minutes to feed all images into the model.

## Extra: Simple Model
### For having better visualization and have significant lower runtime applying simple model on the data in this section.
### Using input shape of 15x15 instead of the real data size makes the learning process easier and cut the runtime. In the model's structure part just using one convolutional layer and a single neuron output it can be enough to compare these two simple and complex models.

In [92]:
# Simple flow data from directory in batches of 100 - Change target size to 15 x 15
simple_train_generator = train_datagen.flow_from_directory('train_set', 
                                                    batch_size=100,
                                                    class_mode='binary',
                                                    target_size=(15,15))

simple_valid_generator = valid_datagen.flow_from_directory('validation_set',
                                                    batch_size=100,
                                                    class_mode='binary',
                                                    target_size=(15,15))

Found 36000 images belonging to 2 classes.
Found 4000 images belonging to 2 classes.


In [97]:
# Using just one simple convolutional with maxpooling in the hidden layer
simple_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(15,15,3)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [98]:
simple_model.summary()

### The total trainable params significantly reduced from 74k to just 1k! So, now can run model for more than 5 epochs.

In [99]:
simple_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])

In [100]:
simple_hist = simple_model.fit(simple_train_generator,
         steps_per_epoch=360,
         epochs=30,
         validation_data=simple_valid_generator,
         validation_steps=40,
         verbose=1)

Epoch 1/30
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 215ms/step - binary_accuracy: 0.8786 - loss: 0.3026 - val_binary_accuracy: 0.9625 - val_loss: 0.1660
Epoch 2/30
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166us/step - binary_accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/30
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 217ms/step - binary_accuracy: 0.9610 - loss: 0.1065 - val_binary_accuracy: 0.9492 - val_loss: 0.1691
Epoch 4/30
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81us/step - binary_accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/30
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 219ms/step - binary_accuracy: 0.9671 - loss: 0.0925 - val_binary_accuracy: 0.9187 - val_loss: 0.3365
Epoch 6/30
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94us/step - binary_accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 7/30
[1m360/360[0m [32m━━━━━━━━━━━━━━━━━━━━[

### Visualize the simple model

In [110]:
# Measure train & validation loss
loss2 = simple_hist.history['loss'][0::2]
val_loss2 = simple_hist.history['val_loss']
epochs2 = range(1,16)
acc2 = simple_hist.history['binary_accuracy'][0::2]
val_acc2 = simple_hist.history['val_binary_accuracy']

# Plot the final results
fig, axes = plt.subplots(1, 2, figsize=(9,4))

axes[0].plot(epochs2, loss2, 'b', label='Training loss')
axes[0].plot(epochs2, val_loss2, 'r', label='Validation loss')
axes[0].set_title('Binary Cross Entropy Loss')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Loss')
axes[0].legend()

axes[1].plot(epochs2, acc2, 'b', label='Training accuracy')
axes[1].plot(epochs2, val_acc2, 'r', label='Validation accuracy')
axes[1].set_title('Binary Acuuracy')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Accuracy')
axes[1].legend()

plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

### Completed! Because of model's simplicity its take about 15 minutes to fit the model. Finally, after completed 15 epochs the results are:
#### Training: accuracy = 0.9786 & loss = 0.0676 
#### Validation: accuracy = 0.9663 & loss = 0.1271

### So, the final model in this part achieved as above results that shows its difference with the previous model is egligible, even fewer than 2%!
## The end.