[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/aldomunaretto/immune_deep_learning/blob/main/notebooks/01_intro_DL/06_keras_classification.ipynb)

# Artificial Neural Networks - Classification

## Part 1 - Data Preprocessing

### Import Libraries


In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from keras import Input
from keras.models import Sequential
from keras.layers import Dense, Dropout

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix


### Download Dataset from Kaggle


In [8]:
# Install Kaggle library
!pip install kaggle

# Download and unzip the dataset
!kaggle datasets download -d shrutimechlearn/churn-modelling -p /content/drive/MyDrive/data
!unzip /content/drive/MyDrive/data/churn-modelling.zip -d /content/drive/MyDrive/data

print("Dataset downloaded and unzipped")

In [9]:
dataset = pd.read_csv('/content/drive/MyDrive/data/Churn_Modelling.csv')

X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

### Encode categorical data


In [16]:
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])

In [17]:
transformer = ColumnTransformer(
    transformers=[
        ("Churn_Modelling",        # transformation name
         OneHotEncoder(categories='auto'), # the class to transform the data
         [1]            # the column(s) to be applied on.
         )
    ], remainder='passthrough'
)

X = transformer.fit_transform(X)
X = X[:, 1:]

### Split into train and test

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Feature Scaling

In [19]:
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

## Part 2 - Building the NN




In [20]:
# Initialize the NN
classifier = Sequential()

# Add the input layer
classifier.add(Input(shape=(11,)))

# Add the first hidden layer with dropout
classifier.add(Dense(units = 6, kernel_initializer = "uniform", activation = "relu"))
classifier.add(Dropout(0.1))

# Add the second hidden layer with dropout
classifier.add(Dense(units = 6, kernel_initializer = "uniform", activation = "relu"))
classifier.add(Dropout(0.1))

# Add the output layer
classifier.add(Dense(units = 1, kernel_initializer = "uniform",  activation = "sigmoid"))

# Compile the NN
classifier.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

# Model summary
classifier.summary()

In [21]:
# Fit the NN to the training set
classifier.fit(X_train, y_train,  batch_size = 10, epochs = 100)

## Part 3 - Evaluating the model and making final predictions

### Predicting on test set

In [22]:
y_pred  = classifier.predict(X_test)
y_pred = (y_pred>0.5)

### Confusion Matrix

In [23]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

In [24]:
(cm[0][0]+cm[1][1])/cm.sum()

## Part 4 - Evaluating, Improving and Tuning the NN

### Evaluating the NN

In [25]:
!pip install scikeras

from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score

In [26]:
def build_classifier():
  # Initialize the NN
  classifier = Sequential()

  # Add the input layer
  classifier.add(Input(shape=(11,)))

  # Add the first hidden layer
  classifier.add(Dense(units = 6, kernel_initializer = "uniform", activation = "relu"))

  # Add the second hidden layer
  classifier.add(Dense(units = 6, kernel_initializer = "uniform",  activation = "relu"))

  # Add the output layer
  classifier.add(Dense(units = 1, kernel_initializer = "uniform",  activation = "sigmoid"))

  # Compile the NN
  classifier.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

  # Return the classifier
  return classifier

In [28]:
classifier = KerasClassifier(model = build_classifier, batch_size = 10, epochs = 100)
accuracies = cross_val_score(estimator=classifier, X = X_train, y = y_train, cv = 10, n_jobs=-1, verbose = 1)

In [29]:
accuracies

In [30]:
mean = accuracies.mean()
variance = accuracies.std()
print(mean)
print(variance)

### Improving the NN

#### Dropout Regularization to reduce overfitting

### Tuning the NN

In [36]:
from sklearn.model_selection import GridSearchCV # sklearn.grid_search

In [43]:
def build_classifier(optimizer="adam"):
  # Initialize the NN
  classifier = Sequential()

  # Add the input layer
  classifier.add(Input(shape=(11,)))

  # Add the first hidden layer
  classifier.add(Dense(units = 6, kernel_initializer = "uniform", activation = "relu"))

  # Add the second hidden layer
  classifier.add(Dense(units = 6, kernel_initializer = "uniform",  activation = "relu"))

  # Add the output layer
  classifier.add(Dense(units = 1, kernel_initializer = "uniform",  activation = "sigmoid"))

  # Compile the NN
  classifier.compile(optimizer = optimizer, loss = "binary_crossentropy", metrics = ["accuracy"])

  # Return the classifier
  return classifier

In [44]:
classifier = KerasClassifier(model = build_classifier)

In [46]:
print(classifier.get_params().keys())

In [47]:
parameters = {
    'batch_size' : [25,32],
    'epochs' : [10, 50],
    'optimizer' : ['adam', 'rmsprop']
}

In [48]:
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10)

grid_search = grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_

In [None]:
print(best_parameters)
print(best_accuracy)