In [3]:
import os
import torch
import pickle
import pandas as pd
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split, GridSearchCV
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from scikeras.wrappers import KerasRegressor # kerasgeressor?
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt

# tensorboard 
import sys
import torch.nn.functional as F
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime
log_dir = 'logs/fit/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
tensorflow_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [5]:
# device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# initializing the encoders and scaler
label_encoder_gender = LabelEncoder()
label_ohe_geo = OneHotEncoder()
scaler = StandardScaler()

# Hyper-parameters 
hidden_size = 200 
batch_size = 32
learning_rate = 0.001

# data loading and preprocessing
data = pd.read_csv('Churn_Modelling.csv')
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

# encoding the gender col
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

# encoding the geo column
geo_encoder = label_ohe_geo.fit_transform(data[['Geography']])
cols = label_ohe_geo.get_feature_names_out(['Geography'])
geo_encoded_df = pd.DataFrame(geo_encoder.toarray(), columns=cols)
data_encoded = pd.concat([data.drop(['Geography'], axis=1), geo_encoded_df], axis=1)

# defining the input features and output 
X = data_encoded.drop(['EstimatedSalary'], axis=1)
Y = data_encoded['EstimatedSalary'].values.reshape(-1, 1)


In [3]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

Y_train

array([[179093.26],
       [195978.86],
       [ 85891.02],
       ...,
       [ 92220.12],
       [ 97508.04],
       [ 53581.14]])

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# converting to tesnsors 
X_train = torch.tensor(scaler.fit_transform(X_train), dtype=torch.float32)
Y_train = torch.tensor(scaler.fit_transform(Y_train), dtype=torch.float32)  

train_dataset = TensorDataset(X_train, Y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [5]:
train_dataset[2]

(tensor([-0.9615,  0.9132, -1.4164, -0.6954,  0.6186, -0.9167,  0.6492, -1.0258,
          1.9663, -0.9985, -0.5795,  1.7349]),
 tensor([-0.2528]))

In [6]:
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet, self).__init__()
        self.input_size = input_size
        self.l1 = nn.Linear(input_size, hidden_size)  # First hidden layer
        self.relu1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, hidden_size) # second layer
        self.relu2 = nn.ReLU()
        self.l3 = nn.Linear(hidden_size, 1)  # Output layer (salary prediction)
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu1(out)
        out = self.l2(out)
        out = self.relu2(out)
        out = self.l3(out)  # No activation at the end for regression
        return out

input_size = X_train.shape[1]

# initializing the defined model
model = NeuralNet(input_size, hidden_size).to(device)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 

In [7]:
train_dataset = TensorDataset(X_train, Y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

train_loader

<torch.utils.data.dataloader.DataLoader at 0x30553dd90>

In [8]:
for batch_idx, (features, labels) in enumerate(train_loader):
    print(f"Batch {batch_idx + 1}:")
    print("Features:", features)
    print("Labels:", labels)

Batch 1:
Features: tensor([[ 1.2490e+00,  9.1325e-01, -4.6564e-01,  1.3868e+00, -1.2185e+00,
          8.0844e-01,  6.4920e-01, -1.0258e+00, -5.0858e-01,  1.0015e+00,
         -5.7947e-01, -5.7639e-01],
        [ 2.0584e+00,  9.1325e-01,  3.9001e-01,  6.9270e-01, -1.2185e+00,
          8.0844e-01,  6.4920e-01,  9.7482e-01, -5.0858e-01,  1.0015e+00,
         -5.7947e-01, -5.7639e-01],
        [-1.6879e+00, -1.0950e+00, -8.5351e-02, -1.3447e-03,  6.5946e-01,
         -9.1669e-01, -1.5404e+00, -1.0258e+00, -5.0858e-01,  1.0015e+00,
         -5.7947e-01, -5.7639e-01],
        [ 1.1037e+00,  9.1325e-01, -3.7057e-01, -1.3447e-03, -1.2185e+00,
          8.0844e-01,  6.4920e-01, -1.0258e+00, -5.0858e-01, -9.9850e-01,
         -5.7947e-01,  1.7349e+00],
        [-2.2587e+00,  9.1325e-01,  1.5309e+00,  3.4568e-01, -1.2185e+00,
         -9.1669e-01, -1.5404e+00,  9.7482e-01,  1.9663e+00, -9.9850e-01,
         -5.7947e-01,  1.7349e+00],
        [-8.0581e-01, -1.0950e+00, -5.6071e-01, -6.9539e-01, 

In [9]:
num_epochs = 100
loss_threshold = 0.2

for epoch in range(num_epochs):
    for batch_X, batch_Y in train_loader:
        batch_X, batch_Y = batch_X.to(device), batch_Y.to(device)

        # Forward pass
        y_predicted = model(batch_X)
        loss = criterion(y_predicted, batch_Y)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

    if loss.item() < loss_threshold:
        print(f"Loss is below {loss_threshold}, saving the model...")
        torch.save(model.state_dict(), 'model_weights.pth')  # Saves the model weights


Epoch 1/100, Loss: 0.8927951455116272
Epoch 2/100, Loss: 1.3013989925384521
Epoch 3/100, Loss: 1.2629177570343018
Epoch 4/100, Loss: 1.1468415260314941
Epoch 5/100, Loss: 1.0868761539459229
Epoch 6/100, Loss: 1.0609710216522217
Epoch 7/100, Loss: 1.176145076751709
Epoch 8/100, Loss: 0.9033865928649902
Epoch 9/100, Loss: 0.8080427646636963
Epoch 10/100, Loss: 0.9480102062225342
Epoch 11/100, Loss: 0.6681060194969177
Epoch 12/100, Loss: 1.2240649461746216
Epoch 13/100, Loss: 1.2323538064956665
Epoch 14/100, Loss: 0.7958346605300903
Epoch 15/100, Loss: 0.7707509994506836
Epoch 16/100, Loss: 0.9844359159469604
Epoch 17/100, Loss: 1.317051649093628
Epoch 18/100, Loss: 1.0125024318695068
Epoch 19/100, Loss: 1.004472017288208
Epoch 20/100, Loss: 1.0695834159851074
Epoch 21/100, Loss: 1.0028235912322998
Epoch 22/100, Loss: 0.9669603705406189
Epoch 23/100, Loss: 0.6812388300895691
Epoch 24/100, Loss: 1.0388717651367188
Epoch 25/100, Loss: 1.117794394493103
Epoch 26/100, Loss: 1.155645489692688


In [10]:
ten_1 = torch.tensor([-0.9615,  0.9132, -1.4164, -0.6954,  0.6186, -0.9167,  0.6492, -1.0258,
          1.9663, -0.9985, -0.5795,  1.7349], dtype=torch.float32)

ten_2 = torch.tensor([85891.0234], dtype=torch.float32)

ten_1

tensor([-0.9615,  0.9132, -1.4164, -0.6954,  0.6186, -0.9167,  0.6492, -1.0258,
         1.9663, -0.9985, -0.5795,  1.7349])

In [11]:
y_hat = model(ten_1)
y_hat

tensor([-0.0444], grad_fn=<ViewBackward0>)

In [12]:
criterion(y_hat, ten_2)

tensor(7.3773e+09, grad_fn=<MseLossBackward0>)

In [19]:
pickle_dir = 'pickles'

with open(os.path.join(pickle_dir, 'X_test.pkl'), 'rb') as f:
    pickle.load(f)

X_test.shape[1]

12

In [9]:
from training import X_test, Y_test
import torch

X_test_scaled = scaler.fit_transform(X_test)  # Scale input features
X_test_scaled = torch.tensor(X_test_scaled, dtype=torch.float32).to(device)
Y_test

array([[ 41788.37],
       [146379.3 ],
       [ 58561.31],
       ...,
       [ 33373.26],
       [ 76755.99],
       [107674.3 ]])

In [11]:
# defining a function to create the model and try different parameters

def create_model(neruons=32, layers=1):
    model = Sequential()
    model.add(Dense(neruons, activation='relu', input_size = (X_train.shape[1], )))

    for i in range(layers-1):
        model.add(Dense(neruons, activation='relu'))
    
    model.add(Dense(1))
    model.compile(
        optimizer = opt,
        loss = loss,
        metrics = ['mse']
    )

    return model

In [12]:
# create a keraclassifier

model =  KerasRegressor(
    build_fn = create_model, # here we define what is our grid search for the model architecture
    epochs = 50,
    batch_size = 10,
    verbose = 0
)

NameError: name 'KerasClassifier' is not defined

In [4]:
param_grid = {
    'neurons': [16, 32, 64, 128],
    'layers': [1, 2],
    'epochs': [50, 100]
}

In [None]:
gird = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

print('Best: %f, using %s' % (grid_result.best_score_, grid_result.best_params_))

In [10]:
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from training_tf import input_size, X_train_scaled, Y_train_scaled

param_grid = {
    'neruons': [16, 32, 64, 128, 256],
    'layers': [1, 2],
    'epochs': [50, 100]
}

def create_model(neruons=32, layers=1):
    model = Sequential()
    model.add(Dense(neruons, activation='relu', input_size = (12, )))

    for _ in range(layers-1):
        model.add(Dense(neruons, activation='relu'))
    
    model.add(Dense(1))
    model.compile(
        optimizer = 'adam',
        loss = 'mse',
        metrics = ['accuracy']
    )

    return model

model =  KerasRegressor(
    build_fn = create_model, # here we define what is our grid search for the model architecture
    batch_size = 10,
    verbose = 0
)



In [12]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)

In [13]:
print(grid.get_params())

{'cv': 3, 'error_score': nan, 'estimator__model': None, 'estimator__build_fn': <function create_model at 0x158975da0>, 'estimator__warm_start': False, 'estimator__random_state': None, 'estimator__optimizer': 'rmsprop', 'estimator__loss': None, 'estimator__metrics': None, 'estimator__batch_size': 10, 'estimator__validation_batch_size': None, 'estimator__verbose': 0, 'estimator__callbacks': None, 'estimator__validation_split': 0.0, 'estimator__shuffle': True, 'estimator__run_eagerly': False, 'estimator__epochs': 1, 'estimator': KerasRegressor(
	model=None
	build_fn=<function create_model at 0x158975da0>
	warm_start=False
	random_state=None
	optimizer=rmsprop
	loss=None
	metrics=None
	batch_size=10
	validation_batch_size=None
	verbose=0
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=1
), 'n_jobs': -1, 'param_grid': {'neruons': [16, 32, 64, 128, 256], 'layers': [1, 2], 'epochs': [50, 100]}, 'pre_dispatch': '2*n_jobs', 'refit': True, 'return_train_score': Fal