In [19]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [20]:
df = pd.read_csv('data/bank_transactions_data_2.csv')


In [21]:
df.dropna(inplace=True)


In [22]:
label_encoder = LabelEncoder()

# Columns that need encoding
categorical_columns = ['TransactionType', 'Location', 'DeviceID', 'MerchantID', 'Channel', 'CustomerOccupation']

for col in categorical_columns:
    df[col] = label_encoder.fit_transform(df[col])

In [23]:
features = ['TransactionAmount', 'CustomerAge', 'TransactionDuration', 'LoginAttempts', 'AccountBalance']
X = df[features]

# Target variable: let's predict 'TransactionType' (Debit=0, Credit=1)
y = df['TransactionType']

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Standardize the features (important for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [32]:
svm_linear = SVC(kernel='linear')
svm_linear.fit(X_train_scaled, y_train)

In [33]:
y_pred_linear = svm_linear.predict(X_test_scaled)

In [34]:
accuracy_linear = accuracy_score(y_test, y_pred_linear)
print("SVM with Linear Kernel Accuracy: ", accuracy_linear)
print("Classification Report for Linear Kernel:")
print(classification_report(y_test, y_pred_linear))


SVM with Linear Kernel Accuracy:  0.7773359840954275
Classification Report for Linear Kernel:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       112
           1       0.78      1.00      0.87       391

    accuracy                           0.78       503
   macro avg       0.39      0.50      0.44       503
weighted avg       0.60      0.78      0.68       503



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [35]:
svm_poly = SVC(kernel='poly', degree=3)
svm_poly.fit(X_train_scaled, y_train)

# Make predictions for Polynomial Kernel
y_pred_poly = svm_poly.predict(X_test_scaled)


In [36]:
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print("SVM with Polynomial Kernel Accuracy: ", accuracy_poly)
print("Classification Report for Polynomial Kernel:")
print(classification_report(y_test, y_pred_poly))

SVM with Polynomial Kernel Accuracy:  0.7773359840954275
Classification Report for Polynomial Kernel:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       112
           1       0.78      1.00      0.87       391

    accuracy                           0.78       503
   macro avg       0.39      0.50      0.44       503
weighted avg       0.60      0.78      0.68       503



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
if accuracy_linear > accuracy_poly:
    print("The Linear Kernel model performs better.")
else:
    print("The Polynomial Kernel model performs better.")

The Polynomial Kernel model performs better.
