<a href="https://colab.research.google.com/github/abhishek6361/ybi/blob/main/project2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>



```
# This is formatted as code
```



# Import necessary libraries (likely pandas, numpy, sklearn)

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler

# Import data


In [None]:
data = pd.read_csv('bank_customer_churn_data.csv')

# Data Preparation


In [None]:
data.set_index('customer_id', inplace=True)

# Encoding

In [None]:
data['geography'] = data['geography'].map({'France': 2, 'Germany': 1, 'Spain': 0})
data['gender'] = data['gender'].map({'Male': 0, 'Female': 1})
data['zero_bank_balance'] = (data['balance'] == 0).astype(int)

# Define features and label

In [None]:
features = data.drop(['surname', 'churn'], axis=1)
label = data['churn']

# Random Undersampling

In [None]:
rus = RandomUnderSampler(random_state=42)
X_rus, y_rus = rus.fit_resample(features, label)


# Random Oversampling

In [None]:
ros = RandomOverSampler(random_state=42)
X_ros, y_ros = ros.fit_resample(features, label)

# Train-test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=0.2, random_state=42)

# Standardize features

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SVM with raw data

In [None]:
svm_raw = SVC(random_state=42)
svm_raw.fit(X_train_scaled, y_train)
accuracy_raw = svm_raw.score(X_test_scaled, y_test)
print(f"Accuracy with raw data: {accuracy_raw}")

# SVM with undersampled data

In [None]:
svm_rus = SVC(random_state=42)
svm_rus.fit(X_rus, y_rus)
accuracy_rus = svm_rus.score(X_test_scaled, y_test)
print(f"Accuracy with undersampled data: {accuracy_rus}")

# SVM with oversampled data

In [None]:
svm_ros = SVC(random_state=42)
svm_ros.fit(X_ros, y_ros)
accuracy_ros = svm_ros.score(X_test_scaled, y_test)
print(f"Accuracy with oversampled data: {accuracy_ros}")

# Example of hyperparameter tuning using GridSearchCV

In [None]:
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'poly', 'sigmoid']
}

grid_search = GridSearchCV(SVC(random_state=42), param_grid, cv=5)
grid_search.fit(X_train_scaled, y_train)

best_model = grid_search.best_estimator_
best_accuracy = best_model.score(X_test_scaled, y_test)
print(f"Best model accuracy: {best_accuracy}")
print(f"Best parameters: {grid_search.best_params_}")

In [None]:
from google.colab import drive
drive.mount('/content/drive')