In [None]:
# Re-import necessary libraries due to kernel reset
import pandas as pd

# Reload the dataset
file_path = "/mnt/data/bank.csv"
bank_df = pd.read_csv(file_path)

# Display first few rows and value counts of the target variable
initial_preview = bank_df.head()
target_distribution = bank_df['deposit'].value_counts()

initial_preview, target_distribution


In [None]:
from sklearn.preprocessing import LabelEncoder

# Make a copy for processing
df = bank_df.copy()

# Step 1: Check for missing values
missing_values = df.isnull().sum()

# Step 2: Encode the target variable
df['deposit'] = df['deposit'].map({'yes': 1, 'no': 0})

# Step 3: Identify categorical columns
categorical_cols = df.select_dtypes(include='object').columns

# Step 4: Encode categorical variables using Label Encoding for simplicity
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Step 5: Check cleaned data
cleaned_preview = df.head()
df_shape = df.shape

missing_values, cleaned_preview, df_shape


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Split data into features and target
X = df.drop('deposit', axis=1)
y = df['deposit']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build the deep learning model
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=0)

# Evaluate on test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

test_accuracy
