In [None]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.datasets import load_digits

In [None]:
dataset = pd.read_csv('/Users/pawankumarkc/Documents/vscode_workspace/MLAlgo/datasets/Churn_Modelling.csv')
dataset.head()

In [None]:
dataset['Exited'].value_counts()

#Imbalanced data

In [None]:
dataset = dataset.iloc[:,3:]
dataset.head()

In [None]:
dataset.isnull().sum()

In [None]:
dataset.info()

In [None]:
dataset = pd.get_dummies(dataset, columns=['Geography','Gender'], drop_first=True)

In [None]:
dataset.head()

In [None]:
## Split the data into dep and ind variable

x = dataset.drop(['Exited'], axis=1) 
y = dataset[['Exited']]

In [None]:
# Balance the data

from imblearn.over_sampling import SMOTE
smote = SMOTE()
x_smote, y_smote = smote.fit_resample(x, y)
print(y.value_counts())
print(y_smote.value_counts())

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_smote, y_smote, test_size=0.25, random_state=101)

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [None]:
#Hyperparam tuning

#1. How many no. of hidden layer we should have?
#2. How many no. of neurons we should have in each hidden layer?
#3. Which activation to use in each hidden layer?
#4. What is the best learning rate?

from kerastuner.tuners import RandomSearch


In [None]:
import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense, Dropout, BatchNormalization

In [None]:
# Multi layer preceptron

model = Sequential()
model.add(Dense(32, activation='relu',input_dim=11))
model.add(Dropout(0.5))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(BatchNormalization())

model.add(Dense(1, activation='sigmoid'))

model.summary()

In [None]:
def build_model(hp):
    model = Sequential()
    #Assign hyperparam tuning - how many hidden layer??
    counter=0
    for i in range(hp.Int('no_hidden_layer', min_value=1, max_value=10)):
        if counter==0:
            model.add(Dense(hp.Int('no_nuerons'+ str(i), min_value=8, max_value=128, step=8), 
                                   activation=hp.Choice('activation_func'+str(i), values=['relu','leaky_relu']), input_dim=11))
            model.add(Dropout(hp.Choice("Dropout"+str(i), values=[0.1,0.2,0.3,0.4,0.5])))
        else:
            model.add(Dense(hp.Int('no_nuerons'+ str(i), min_value=8, max_value=128, step=8), 
                        activation=hp.Choice('activation_func'+str(i), values=['relu','leaky_relu'])))
            model.add(Dropout(hp.Choice("Dropout"+str(i), values=[0.1,0.2,0.3,0.4,0.5])))
        counter+=1

    model.add(Dense(1, activation='sigmoid')) # output layer, binary class= sigmoid
    model.compile(optimizer=hp.Choice("Optimizer", values=['adam', 'rmsprop','sgd']),
                   loss='binary_crossentropy', metrics=['accuracy'])
    return model
    

In [None]:
from kerastuner.tuners import RandomSearch
import kerastuner as kt

tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    directory='my_own_dir', project_name='HyperParam'
)

In [None]:
tuner.search(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

In [None]:
tuner.get_best_hyperparameters()[0].values

In [None]:
model = tuner.get_best_models(num_models=1)[0]

In [None]:
model.fit(x_train, y_train, batch_size=32, epochs=100, initial_epoch=5, validation_data=(x_test, y_test))

## pytorch DNN

In [None]:
import torch
from torch import nn    #neural network
from torch.nn import functional as f    #functional api
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

#Brest cancer datset - classification
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

from tqdm import tqdm #Progress package

In [None]:
torch.has_mps

In [None]:
import tensorflow as tf
tf.config.list_physical_devices("cpu")

In [None]:
#loading dataset

data = load_breast_cancer()
data

In [None]:
x = data['data']
y = data['target']

print(f"Size x = {x.shape}")
print(f"Size y =  {y.shape}")


In [None]:
pd.DataFrame(x).head

In [None]:
pd.DataFrame(y).value_counts()  #Data is balanced

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.25, random_state=42, stratify=y)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

In [None]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [None]:
class Hyperparameters():
    def __init__(self):
        self.number_of_epochs = 50
        self.batch_size=8
        self.learning_rate=0.01
        self.shuffle = True

In [None]:
#Train data

class TrainData(Dataset):
    def __init__(self, x_train, y_train):
        self.x_train = torch.tensor(x_train, dtype=torch.float32)
        self.y_train = torch.tensor(y_train, dtype=torch.float32)

    def __getitem__(self, index):
        return self.x_train[index], self.y_train[index]
    
    def __len__(self):
        return len(self.x_train)
    
train_data = TrainData(x_train, y_train)



In [None]:
#Test data

class TestData(Dataset):
    def __init__(self, x_test):
        self.x_train = torch.tensor(x_test, dtype=torch.float32)

    def __getitem__(self, index):
        return self.x_test[index]
    
    def __len__(self):
        return len(self.x_test)
    
test_data = TestData(x_test)

In [None]:
#Declaring the train and test dataset loaders

hyperparameters = Hyperparameters()
train_loader = DataLoader(dataset= train_data, batch_size=hyperparameters.batch_size, 
                          shuffle=hyperparameters.shuffle)
test_loader = DataLoader(dataset=test_data, batch_size=1)

In [None]:
#Define the model

class BrestCancerClassification(nn.Module):
    def __init__(self, input_shape):
        super(BrestCancerClassification, self).__init__()

        self.fc1 = nn.Linear(input_shape, 64)
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))

        return x

In [None]:
#Checking the GPU
!nvidia -smi

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
x_train.shape

In [None]:
#building model
model = BrestCancerClassification(input_shape=x_train.shape[1])
#declaring the Optimizer
optimizer = optim.Adam(model.parameters(), lr = hyperparameters.learning_rate)
#declaring the loss function
criterian = nn.BCELoss()
#moving model to device
model.to(device)

In [None]:
#This function is responsible for calculating the accuracy for a given batch

def binary_acc(y_pred, y):
    results = torch.round(y_pred)
    correct_results_sum = (results == y).sum().float()
    acc = correct_results_sum / y.shape[0]
    acc = torch.round(acc*100)


In [None]:
#Training the model 
losses = []
accuracies = []
model.train()

#putting the model in train mode
for i in range(1, hyperparameters.number_of_epochs+1):
    epoch_loss = 0
    epoch_accuracy = 0
    for data in train_loader:
        x_train, y_train = data
        x_train = x_train.to(device)    #Moving the input features to device
        y_train = y_train.to(device)

        optimizer.zero_grad()
        #Zeroing the gradinets so that they don't accumulate the next batch

        y_pred = model(x_train)
        loss = criterian(y_pred, y_train.reshape(-1,1))

        acc = binary_acc(y_pred, y_train.reshape(-1,1))

        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_accuracy += acc.item()

    if i % 5 == 0:
        print(f'Epoch {i+0.3}: | Loss: {epoch_loss / len(train_loader):.5f} | Acc: {epoch_accuracy/len(train_loader):.3f }')
        losses.append(epoch_loss/len(train_loader))
        accuracies.append(epoch_loss/len(train_loader))
        


In [None]:
#plotting the losses

