# Bank Marketing

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

train_df = pd.read_csv("termdeposit_train.csv")
test_df = pd.read_csv("termdeposit_test.csv")

train_df = pd.get_dummies(train_df, columns=['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome'], drop_first=True)
test_df = pd.get_dummies(test_df, columns=['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome'], drop_first=True)

X_train = train_df.drop(['ID', 'subscribed'], axis=1)
y_train = train_df['subscribed']
X_test = test_df.drop('ID', axis=1)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

rf_classifier = RandomForestClassifier(random_state=42)

rf_classifier.fit(X_train, y_train)

y_pred_train = rf_classifier.predict(X_train)
print("Training Set Performance:")
print("Accuracy:", accuracy_score(y_train, y_pred_train))
print("Classification Report:")
print(classification_report(y_train, y_pred_train))


y_pred_val = rf_classifier.predict(X_val)
print("\nValidation Set Performance:")
print("Accuracy:", accuracy_score(y_val, y_pred_val))
print("Classification Report:")
print(classification_report(y_val, y_pred_val))


rf_classifier = RandomForestClassifier(random_state=42, max_depth=10, min_samples_split=5, min_samples_leaf=2)
rf_classifier.fit(X_train, y_train)

y_pred_val_hyper = rf_classifier.predict(X_val)
print("\nValidation Set Performance (After Hyperparameter Adjustment):")
print("Accuracy:", accuracy_score(y_val, y_pred_val_hyper))
print("Classification Report:")
print(classification_report(y_val, y_pred_val_hyper))

y_test_pred = rf_classifier.predict(X_test)
test_df['predicted_subscribed'] = y_test_pred
print("\nPredictions on Test Dataset:")
print(test_df[['ID', 'predicted_subscribed']])


Training Set Performance:
Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

          no       1.00      1.00      1.00     22333
         yes       1.00      1.00      1.00      2984

    accuracy                           1.00     25317
   macro avg       1.00      1.00      1.00     25317
weighted avg       1.00      1.00      1.00     25317


Validation Set Performance:
Accuracy: 0.903001579778831
Classification Report:
              precision    recall  f1-score   support

          no       0.92      0.97      0.95      5599
         yes       0.63      0.39      0.48       731

    accuracy                           0.90      6330
   macro avg       0.78      0.68      0.71      6330
weighted avg       0.89      0.90      0.89      6330


Validation Set Performance (After Hyperparameter Adjustment):
Accuracy: 0.8984202211690363
Classification Report:
              precision    recall  f1-score   support

          no       0.90      0.99