In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load the dataset
data = pd.read_csv('./Datasets/bank_transactions_data_2.csv')

# Data Preprocessing
# Encode categorical variables
categorical_columns = ['TransactionType', 'Location', 'Channel', 'CustomerOccupation']

for col in categorical_columns:
    data[col] = LabelEncoder().fit_transform(data[col])

# Feature selection
# Select some meaningful features for SVM classification
features = ['TransactionAmount', 'CustomerAge', 'TransactionDuration', 'AccountBalance', 'LoginAttempts']
X = data[features]

# For demonstration, let's create a binary target variable based on `TransactionType` 
# (e.g., classify transactions as Credit (1) or Debit (0))
y = data['TransactionType']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train and evaluate SVM with Linear Kernel
# max_iter : This sets the maximum number of iterations the algorithm will perform during optimization (i.e., training).
# tol : The training will stop when the difference in the optimization objective between iterations is smaller than this threshold (1e-3 or 0.001).
linear_svm = SVC(kernel='linear', random_state=42, max_iter=2000, tol=1e-5)
linear_svm.fit(X_train, y_train)
y_pred_linear = linear_svm.predict(X_test)
accuracy_linear = accuracy_score(y_test, y_pred_linear)
print("Accuracy with Linear Kernel:", accuracy_linear)

# Train and evaluate SVM with Polynomial Kernel
poly_svm = SVC(kernel='poly', degree=3, random_state=42)  # degree=3 for cubic polynomial
poly_svm.fit(X_train, y_train)
y_pred_poly = poly_svm.predict(X_test)
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print("Accuracy with Polynomial Kernel:", accuracy_poly)

# Comparison
if accuracy_linear > accuracy_poly:
    print("Linear kernel performs better.")
else:
    print("Polynomial kernel performs better.")


Accuracy with Linear Kernel: 0.53315649867374
Accuracy with Polynomial Kernel: 0.7811671087533156
Polynomial kernel performs better.


