# **Original Code**

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset



# Load data
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation',
                'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income']
data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", names=column_names, sep=r'\s*,\s*', engine='python')

# Preprocess data
data['income'] = data['income'].apply(lambda x: 0 if x == "<=50K" else 1)
categorical_columns = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']
numerical_columns = ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']

for category in categorical_columns:
    data[category] = data[category].astype('category').cat.codes

data = data.sample(frac=1).reset_index(drop=True)  # Shuffle data
#import pdb; pdb.set_trace()
# Create DataLoader
X = torch.tensor(data[categorical_columns + numerical_columns].values, dtype=torch.float32)
y = torch.tensor(data['income'].values, dtype=torch.float32)
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Define neural network with embeddings
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # import pdb; pdb.set_trace() # cursor here
        self.embeddings = nn.ModuleList([nn.Embedding(len(data[column]), 10) for column in categorical_columns])
        self.fc1 = nn.Linear(10*len(categorical_columns) + len(numerical_columns), 50)
        self.fc2 = nn.Linear(50, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        x_cat = x[:, :len(categorical_columns)].long()
        x_num = x[:, len(categorical_columns):]
        x_cat = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, 1)
        x = torch.cat([x_cat, x_num], 1)
        x = torch.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x.squeeze()

# Train the model
model = Net()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(5):
    for batch_X, batch_y in loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

print("Training complete!")

Epoch 1, Loss: 29.4118
Epoch 2, Loss: 29.4118
Epoch 3, Loss: 41.1765
Epoch 4, Loss: 23.5294
Epoch 5, Loss: 35.2941
Training complete!


# **Create Test and Train Data**

In [None]:
#Splitting the Data
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(data, test_size=0.3, random_state=42)

In [None]:
#Create the Train DataLoader
train_x = torch.tensor(train_data[categorical_columns + numerical_columns].values, dtype=torch.float32)
train_y = torch.tensor(train_data['income'].values, dtype=torch.float32)
train_dataset = TensorDataset(train_x, train_y)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
#Create the Test DataLoader
X = torch.tensor(test_data[categorical_columns + numerical_columns].values, dtype=torch.float32)
y = torch.tensor(test_data['income'].values, dtype=torch.float32)
test_dataset = TensorDataset(X, y)
test_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# **Evaluate The Original Code's Accuracy**

In [None]:
# Define neural network with embeddings
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # import pdb; pdb.set_trace() # cursor here
        self.embeddings = nn.ModuleList([nn.Embedding(len(data[column]), 10) for column in categorical_columns])
        self.fc1 = nn.Linear(10*len(categorical_columns) + len(numerical_columns), 50)
        self.fc2 = nn.Linear(50, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        x_cat = x[:, :len(categorical_columns)].long()
        x_num = x[:, len(categorical_columns):]
        x_cat = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, 1)
        x = torch.cat([x_cat, x_num], 1)
        x = torch.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x.squeeze()

# Train the model
model = Net()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(5):
    for batch_train_x, batch_train_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_train_x)
        loss = criterion(outputs, batch_train_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 70.5882
Epoch 2, Loss: 94.1176
Epoch 3, Loss: 76.4706
Epoch 4, Loss: 70.5882
Epoch 5, Loss: 70.5882


In [None]:
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for batch_test_x, batch_test_y in test_loader:
        outputs = model(batch_test_x)
        predicted = (outputs > 0.5).float()  # Assuming a binary classification problem
        total += batch_test_y.size(0)
        correct += (predicted == batch_test_y).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 24.08%


# **Changing the Layers and Length**

In [None]:
# Define neural network with embeddings
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # import pdb; pdb.set_trace() # cursor here
        self.embeddings = nn.ModuleList([nn.Embedding(len(data[column]), 5) for column in categorical_columns])
        self.fc1 = nn.Linear(5*len(categorical_columns) + len(numerical_columns), 25)
        self.fc2 = nn.Linear(25,10)
        self.fc3 = nn.Linear(10, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        x_cat = x[:, :len(categorical_columns)].long()
        x_num = x[:, len(categorical_columns):]
        x_cat = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, 1)
        x = torch.cat([x_cat, x_num], 1)
        x = torch.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x.squeeze()

# Train the model
model = Net()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(5):
    for batch_train_x, batch_train_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_train_x)
        loss = criterion(outputs, batch_train_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 0.6113
Epoch 2, Loss: 0.6130
Epoch 3, Loss: 0.4799
Epoch 4, Loss: 0.3283
Epoch 5, Loss: 0.5458


In [None]:
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for batch_test_x, batch_test_y in test_loader:
        outputs = model(batch_test_x)
        predicted = (outputs > 0.5).float()  # Assuming a binary classification problem
        total += batch_test_y.size(0)
        correct += (predicted == batch_test_y).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 75.92%


# **Changing The Model Back to Original Lengths with the Additional Layer**

> Indented block



In [None]:
# Define neural network with embeddings
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # import pdb; pdb.set_trace() # cursor here
        self.embeddings = nn.ModuleList([nn.Embedding(len(data[column]), 10) for column in categorical_columns])
        self.fc1 = nn.Linear(10*len(categorical_columns) + len(numerical_columns), 50)
        self.fc2 = nn.Linear(50,10)
        self.fc3 = nn.Linear(10, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        x_cat = x[:, :len(categorical_columns)].long()
        x_num = x[:, len(categorical_columns):]
        x_cat = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, 1)
        x = torch.cat([x_cat, x_num], 1)
        x = torch.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x.squeeze()

# Train the model
model = Net()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(5):
    for batch_train_x, batch_train_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_train_x)
        loss = criterion(outputs, batch_train_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 0.4104
Epoch 2, Loss: 0.6134
Epoch 3, Loss: 0.5983
Epoch 4, Loss: 0.6145
Epoch 5, Loss: 0.4772


In [None]:
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for batch_test_x, batch_test_y in test_loader:
        outputs = model(batch_test_x)
        predicted = (outputs > 0.5).float()  # Assuming a binary classification problem
        total += batch_test_y.size(0)
        correct += (predicted == batch_test_y).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 75.92%


# **Let's Try Some Dropout Layers and Batch Normalization**


In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(len(data[column]), min(50, len(data[column]) // 2)) for column in categorical_columns])
        embedding_dim = sum([min(50, len(data[column]) // 2) for column in categorical_columns])

        self.fc1 = nn.Linear(embedding_dim + len(numerical_columns), 100)
        self.bn1 = nn.BatchNorm1d(100)
        self.dropout1 = nn.Dropout(0.3)

        self.fc2 = nn.Linear(100, 50)
        self.bn2 = nn.BatchNorm1d(50)
        self.dropout2 = nn.Dropout(0.2)

        self.fc3 = nn.Linear(50, 1)

    def forward(self, x):
        x_cat = x[:, :len(categorical_columns)].long()
        x_num = x[:, len(categorical_columns):]
        x_cat = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, 1)
        x = torch.cat([x_cat, x_num], 1)

        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)

        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)

        x = torch.sigmoid(self.fc3(x))
        return x.squeeze()

# Initialize and train the model
model = Net()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(5):
    for batch_train_x, batch_train_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_train_x)
        loss = criterion(outputs, batch_train_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 0.9038
Epoch 2, Loss: 0.3152
Epoch 3, Loss: 0.4358
Epoch 4, Loss: 0.6756
Epoch 5, Loss: 0.3248


In [None]:
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for batch_test_x, batch_test_y in test_loader:
        outputs = model(batch_test_x)
        predicted = (outputs > 0.5).float()  # Assuming a binary classification problem
        total += batch_test_y.size(0)
        correct += (predicted == batch_test_y).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 83.92%


# **Updating the Learning Rate**


In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(len(data[column]), min(50, len(data[column]) // 2)) for column in categorical_columns])
        embedding_dim = sum([min(50, len(data[column]) // 2) for column in categorical_columns])

        self.fc1 = nn.Linear(embedding_dim + len(numerical_columns), 100)
        self.bn1 = nn.BatchNorm1d(100)
        self.dropout1 = nn.Dropout(0.3)

        self.fc2 = nn.Linear(100, 50)
        self.bn2 = nn.BatchNorm1d(50)
        self.dropout2 = nn.Dropout(0.2)

        self.fc3 = nn.Linear(50, 1)

    def forward(self, x):
        x_cat = x[:, :len(categorical_columns)].long()
        x_num = x[:, len(categorical_columns):]
        x_cat = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, 1)
        x = torch.cat([x_cat, x_num], 1)

        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)

        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)

        x = torch.sigmoid(self.fc3(x))
        return x.squeeze()

# Initialize and train the model
model = Net()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(5):
    for batch_train_x, batch_train_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_train_x)
        loss = criterion(outputs, batch_train_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 0.4108
Epoch 2, Loss: 0.4541
Epoch 3, Loss: 0.2030
Epoch 4, Loss: 0.1581
Epoch 5, Loss: 0.3583


In [None]:
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for batch_test_x, batch_test_y in test_loader:
        outputs = model(batch_test_x)
        predicted = (outputs > 0.5).float()  # Assuming a binary classification problem
        total += batch_test_y.size(0)
        correct += (predicted == batch_test_y).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 83.79%


# **increasing the Epochs**


In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(len(data[column]), min(50, len(data[column]) // 2)) for column in categorical_columns])
        embedding_dim = sum([min(50, len(data[column]) // 2) for column in categorical_columns])

        self.fc1 = nn.Linear(embedding_dim + len(numerical_columns), 100)
        self.bn1 = nn.BatchNorm1d(100)
        self.dropout1 = nn.Dropout(0.3)

        self.fc2 = nn.Linear(100, 50)
        self.bn2 = nn.BatchNorm1d(50)
        self.dropout2 = nn.Dropout(0.2)

        self.fc3 = nn.Linear(50, 1)

    def forward(self, x):
        x_cat = x[:, :len(categorical_columns)].long()
        x_num = x[:, len(categorical_columns):]
        x_cat = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, 1)
        x = torch.cat([x_cat, x_num], 1)

        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)

        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)

        x = torch.sigmoid(self.fc3(x))
        return x.squeeze()

# Initialize and train the model
model = Net()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(20):
    for batch_train_x, batch_train_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_train_x)
        loss = criterion(outputs, batch_train_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 0.6038
Epoch 2, Loss: 0.3757
Epoch 3, Loss: 0.7742
Epoch 4, Loss: 0.3576
Epoch 5, Loss: 0.4793
Epoch 6, Loss: 0.3990
Epoch 7, Loss: 0.3750
Epoch 8, Loss: 0.5500
Epoch 9, Loss: 0.5137
Epoch 10, Loss: 0.3254
Epoch 11, Loss: 0.4063
Epoch 12, Loss: 0.4169
Epoch 13, Loss: 0.6027
Epoch 14, Loss: 0.3511
Epoch 15, Loss: 0.4453
Epoch 16, Loss: 0.3977
Epoch 17, Loss: 0.2839
Epoch 18, Loss: 0.3381
Epoch 19, Loss: 0.4573
Epoch 20, Loss: 0.1934


In [None]:
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for batch_test_x, batch_test_y in test_loader:
        outputs = model(batch_test_x)
        predicted = (outputs > 0.5).float()  # Assuming a binary classification problem
        total += batch_test_y.size(0)
        correct += (predicted == batch_test_y).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 84.02%


In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(len(data[column]), min(50, len(data[column]) // 2)) for column in categorical_columns])
        embedding_dim = sum([min(50, len(data[column]) // 2) for column in categorical_columns])

        self.fc1 = nn.Linear(embedding_dim + len(numerical_columns), 100)
        self.bn1 = nn.BatchNorm1d(100)
        self.dropout1 = nn.Dropout(0.3)

        self.fc2 = nn.Linear(100, 50)
        self.bn2 = nn.BatchNorm1d(50)
        self.dropout2 = nn.Dropout(0.2)

        self.fc3 = nn.Linear(50, 1)

    def forward(self, x):
        x_cat = x[:, :len(categorical_columns)].long()
        x_num = x[:, len(categorical_columns):]
        x_cat = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, 1)
        x = torch.cat([x_cat, x_num], 1)

        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)

        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)

        x = torch.sigmoid(self.fc3(x))
        return x.squeeze()

# Initialize and train the model
model = Net()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
for epoch in range(5):
    for batch_train_x, batch_train_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_train_x)
        loss = criterion(outputs, batch_train_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 0.6674
Epoch 2, Loss: 0.4885
Epoch 3, Loss: 0.3507
Epoch 4, Loss: 0.3196
Epoch 5, Loss: 0.2484


In [None]:
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for batch_test_x, batch_test_y in test_loader:
        outputs = model(batch_test_x)
        predicted = (outputs > 0.5).float()  # Assuming a binary classification problem
        total += batch_test_y.size(0)
        correct += (predicted == batch_test_y).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 83.08%


# **Attempted Adding a Fourth Layer within Network**

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(len(data[column]), min(50, len(data[column]) // 2)) for column in categorical_columns])
        embedding_dim = sum([min(50, len(data[column]) // 2) for column in categorical_columns])

        self.fc1 = nn.Linear(embedding_dim + len(numerical_columns), 200)
        self.bn1 = nn.BatchNorm1d(200)
        self.dropout1 = nn.Dropout(0.3)

        self.fc2 = nn.Linear(200, 100)
        self.bn2 = nn.BatchNorm1d(100)
        self.dropout2 = nn.Dropout(0.2)

        self.fc3 = nn.Linear(100,50)
        self.bn3 = nn.BatchNorm1d(50)
        self.dropout3 = nn.Dropout(0.1)

        self.fc4 = nn.Linear(50, 1)

    def forward(self, x):
        x_cat = x[:, :len(categorical_columns)].long()
        x_num = x[:, len(categorical_columns):]
        x_cat = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, 1)
        x = torch.cat([x_cat, x_num], 1)

        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)

        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)

        x = torch.relu(self.fc3(x))
        x = self.dropout3(x)

        x = torch.sigmoid(self.fc4(x))
        return x.squeeze()

# Initialize and train the model
model = Net()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
for epoch in range(5):
    for batch_train_x, batch_train_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_train_x)
        loss = criterion(outputs, batch_train_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 0.3563
Epoch 2, Loss: 0.3362
Epoch 3, Loss: 0.4057
Epoch 4, Loss: 0.4572
Epoch 5, Loss: 0.2940


In [None]:
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for batch_test_x, batch_test_y in test_loader:
        outputs = model(batch_test_x)
        predicted = (outputs > 0.5).float()  # Assuming a binary classification problem
        total += batch_test_y.size(0)
        correct += (predicted == batch_test_y).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 80.77%


# **All Sigmoid**

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(len(data[column]), min(50, len(data[column]) // 2)) for column in categorical_columns])
        embedding_dim = sum([min(50, len(data[column]) // 2) for column in categorical_columns])

        self.fc1 = nn.Linear(embedding_dim + len(numerical_columns), 100)
        self.bn1 = nn.BatchNorm1d(100)
        self.dropout1 = nn.Dropout(0.4)

        self.fc2 = nn.Linear(100, 50)
        self.bn2 = nn.BatchNorm1d(50)
        self.dropout2 = nn.Dropout(0.3)

        self.fc3 = nn.Linear(50, 1)

    def forward(self, x):
        x_cat = x[:, :len(categorical_columns)].long()
        x_num = x[:, len(categorical_columns):]
        x_cat = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, 1)
        x = torch.cat([x_cat, x_num], 1)

        x = torch.sigmoid(self.bn1(self.fc1(x)))
        x = self.dropout1(x)

        x = torch.sigmoid(self.bn2(self.fc2(x)))
        x = self.dropout2(x)

        x = torch.sigmoid(self.fc3(x))
        return x.squeeze()

# Initialize and train the model
model = Net()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(5):
    for batch_train_x, batch_train_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_train_x)
        loss = criterion(outputs, batch_train_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 0.3109
Epoch 2, Loss: 0.4953
Epoch 3, Loss: 0.3526
Epoch 4, Loss: 0.5317
Epoch 5, Loss: 0.3232


In [None]:
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for batch_test_x, batch_test_y in test_loader:
        outputs = model(batch_test_x)
        predicted = (outputs > 0.5).float()  # Assuming a binary classification problem
        total += batch_test_y.size(0)
        correct += (predicted == batch_test_y).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 84.47%


# **Try To Use A Different Optimizer**


In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(len(data[column]), min(50, len(data[column]) // 2)) for column in categorical_columns])
        embedding_dim = sum([min(50, len(data[column]) // 2) for column in categorical_columns])

        self.fc1 = nn.Linear(embedding_dim + len(numerical_columns), 100)
        self.bn1 = nn.BatchNorm1d(100)
        self.dropout1 = nn.Dropout(0.4)

        self.fc2 = nn.Linear(100, 50)
        self.bn2 = nn.BatchNorm1d(50)
        self.dropout2 = nn.Dropout(0.3)

        self.fc3 = nn.Linear(50, 1)

    def forward(self, x):
        x_cat = x[:, :len(categorical_columns)].long()
        x_num = x[:, len(categorical_columns):]
        x_cat = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x_cat = torch.cat(x_cat, 1)
        x = torch.cat([x_cat, x_num], 1)

        x = torch.sigmoid(self.bn1(self.fc1(x)))
        x = self.dropout1(x)

        x = torch.sigmoid(self.bn2(self.fc2(x)))
        x = self.dropout2(x)

        x = torch.sigmoid(self.fc3(x))
        return x.squeeze()

# Initialize and train the model
model = Net()
criterion = nn.BCELoss()
optimizer = optim.Adamax(model.parameters(), lr=0.001)

In [None]:
for epoch in range(5):
    for batch_train_x, batch_train_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_train_x)
        loss = criterion(outputs, batch_train_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 0.3300
Epoch 2, Loss: 0.6270
Epoch 3, Loss: 0.3911
Epoch 4, Loss: 0.5439
Epoch 5, Loss: 0.3218


In [None]:
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for batch_test_x, batch_test_y in test_loader:
        outputs = model(batch_test_x)
        predicted = (outputs > 0.5).float()  # Assuming a binary classification problem
        total += batch_test_y.size(0)
        correct += (predicted == batch_test_y).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 76.14%
