In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Step 1: Load the dataset
df = pd.read_csv('bank.csv')

# Inspect the DataFrame to find the correct target column name
print(df.columns)

# Assuming the target variable is 'y' (adjust if necessary)
target_column = 'y'  # Change this to the correct column name if different

# Step 2: Handle missing values
# Check for missing values
print(df.isnull().sum())

# Assuming there are no missing values in this dataset, if there are, you can handle them like this:
# df.fillna(df.median(), inplace=True)

# Step 3: Encode categorical variables
# Identify categorical columns
categorical_columns = df.select_dtypes(include=['object']).columns

# Apply LabelEncoder to each categorical column
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Step 4: Scale numerical features
# Identify numerical columns
numerical_columns = df.select_dtypes(include=['int64', 'float64']).columns

# Apply StandardScaler to numerical columns
scaler = StandardScaler()
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# Step 5: Separate features and target variable
X = df.drop(target_column, axis=1)
y = df[target_column]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data preprocessing completed.")

In [None]:
# Compare Accuracies
rf_accuracy = accuracy_score(y_test, rf_y_pred_test)  # Corrected variable
nn_accuracy = accuracy_score(y_true, y_pred)

print(f"Random Forest Accuracy: {rf_accuracy:.4f}")
print(f"Neural Network Accuracy Pytorch: {nn_accuracy:.4f}")
print(f"Neural Network Accuracy tensorflow: {nn_ten:.4f}")

if nn_accuracy > rf_accuracy:
    print("Neural Network performs better.")
elif rf_accuracy > nn_accuracy:
    print("Random Forest performs better.")
else:
    print("Both models perform equally well.")


Random Forest Accuracy: 0.9055
Neural Network Accuracy Pytorch: 0.9077
Neural Network Accuracy tensorflow: 0.9008
Neural Network performs better.


In [7]:
import xgboost as xgb

if xgb.cuda_libs.is_xgboost_gpu_installed():
    print("GPU support is available.")
    tree_method = 'gpu_hist' 
else:
    print("GPU support is not available.")
    tree_method = 'hist'

AttributeError: module 'xgboost' has no attribute 'cuda_libs'

In [8]:
import xgboost as xgb

if xgb.cuda_libs.is_xgboost_gpu_installed():
    print("GPU support is available.")
    tree_method = 'gpu_hist'  # Use GPU for training
else:
    print("GPU support is not available.")
    tree_method = 'hist'      # Use CPU for training

xgb_model = xgb.XGBClassifier(
    objective='binary:logistic',
    tree_method=tree_method,
    random_state=42,
    n_jobs=-1  # Utilize all available CPU cores
)

AttributeError: module 'xgboost' has no attribute 'cuda_libs'

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader, TensorDataset

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define the Neural Network
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Function to train the model
def train_model(model, train_loader, criterion, optimizer, device, epochs=20):
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(train_loader):.4f}")

# Function to evaluate the model
def evaluate_model(model, test_loader, device):
    model.eval()
    y_pred, y_true = [], []
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            y_pred.extend(predicted.cpu().numpy())
            y_true.extend(y_batch.cpu().numpy())
    return accuracy_score(y_true, y_pred)

# Hyperparameter search
hidden_sizes = [32, 64, 128]
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [16, 32, 64]

best_accuracy = 0
best_hyperparameters = {}

for hidden_size in hidden_sizes:
    for lr in learning_rates:
        for batch_size in batch_sizes:
            # Prepare data loaders
            train_dataset = TensorDataset(torch.tensor(X_train.values, dtype=torch.float32),
                                           torch.tensor(y_train.values, dtype=torch.long))
            test_dataset = TensorDataset(torch.tensor(X_test.values, dtype=torch.float32),
                                          torch.tensor(y_test.values, dtype=torch.long))
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

            # Initialize model, loss, and optimizer
            model = NeuralNet(input_size=X_train.shape[1], hidden_size=hidden_size, output_size=2).to(device)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=lr)

            # Train and evaluate
            train_model(model, train_loader, criterion, optimizer, device, epochs=20)
            accuracy = evaluate_model(model, test_loader, device)

            # Save best hyperparameters
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_hyperparameters = {'hidden_size': hidden_size, 'lr': lr, 'batch_size': batch_size}

            print(f"Hidden Size: {hidden_size}, LR: {lr}, Batch Size: {batch_size}, Accuracy: {accuracy:.4f}")

# Output the best hyperparameters
print(f"Best Hyperparameters: {best_hyperparameters}")
print(f"Best Accuracy: {best_accuracy:.4f}")


In [None]:
from sklearn.metrics import accuracy_score, classification_report

# Evaluate Random Forest predictions
# rf_y_pred_test = best_rf_model.predict(X_test)
# rf_test_accuracy = accuracy_score(y_test, rf_y_pred_test)

print("Random Forest - Test Accuracy:", accuracy_score(y_test, rf_y_pred_test))
print("Random Forest - Classification Report:")
print(classification_report(y_test, rf_y_pred_test))


In [None]:
# Evaluate Neural Network predictions
nn_test_accuracy = evaluate_model(model, test_loader, device)
print(f"Neural Network - Test Accuracy: {nn_test_accuracy:.4f}")

In [None]:
from sklearn.metrics import accuracy_score, classification_report

# After tuning, you already have the best_model (tuned TensorFlow model)

# Predict the outcomes on the test set
nn_test_preds_tuned = (best_model.predict(X_test) > 0.5).astype("int32")

# Calculate accuracy after tuning for the TensorFlow model
test_accuracy = accuracy_score(y_test, nn_test_preds_tuned)

# Print the accuracy after tuning
print(f"TensorFlow Neural Network Accuracy (Tuned): {nn_accuracy_tuned:.4f}")

# Optionally, print the classification report for further evaluation
print("\nTensorFlow Neural Network Classification Report (Tuned):")
print(classification_report(y_test, nn_test_preds_tuned))
