In [12]:
# Pseudocode:
# Load the necessary libraries
# Read the dataset into a Pandas DataFrame
# Filter the dataset to include only non-fraudulent transactions
# Train a One-Class SVM on this filtered dataset
# Predict "grey area transactions" as those where the One-Class SVM predicts an anomaly

# Python code:
import pandas as pd
from sklearn.svm import OneClassSVM
from sklearn.metrics import accuracy_score, classification_report

In [5]:
# Load the dataset
data1 = pd.read_csv('creditcard.csv')

In [6]:
data1.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [7]:
## Take some sample of the data

data= data1.sample(frac = 0.1,random_state=1)

data.shape

(28481, 31)

In [8]:
# Filter out non-fraudulent transactions
non_fraud = data[data['Class'] == 0]
non_fraud.shape

(28432, 31)

In [9]:
# Feature selection: Exclude 'Time' and 'Class' for training the model
X_non_fraud = non_fraud.drop(['Time', 'Class'], axis=1)
print('done1')

# Initialize One-Class SVM model
# Note: You'll need to adjust 'nu' parameter which is an upper bound on the fraction of training errors
# and a lower bound of the fraction of support vectors.
one_class_svm_non_fraud = OneClassSVM(kernel='rbf', gamma='auto', nu=0.01)
print('done2')

# Train the model
one_class_svm_non_fraud.fit(X_non_fraud)
print('done3')

# Predict using the trained model (1 for inliers, -1 for outliers)
# Here, we're using the entire dataset for prediction to find "grey area transactions"
predictions = one_class_svm_non_fraud.predict(data.drop(['Time', 'Class'], axis=1))
data['Grey_Non_Fraud'] = predictions
print('done4')


done1
done2
done3
done4


In [10]:
# Count the number of "grey area transactions"
grey_area_count = (data['Grey_Non_Fraud'] == -1).sum()

# Print the number of "grey area transactions"
print(f'Number of "grey area transactions": {grey_area_count}')


Number of "grey area transactions": 1734


In [13]:

data['SVM_Predicted_Class'] = data['Grey_Non_Fraud'].apply(lambda x: 1 if x == -1 else 0)

# Actual class labels of the entire dataset
Y_true = data['Class']

# Predicted class labels from One-Class SVM
Y_pred_svm = data['SVM_Predicted_Class']

# Calculate the accuracy score for the One-Class SVM
svm_accuracy = accuracy_score(Y_true, Y_pred_svm)

# Generate a classification report for the One-Class SVM
svm_class_report = classification_report(Y_true, Y_pred_svm)

# Print the accuracy score and classification report
print(f'Accuracy Score for One-Class SVM: {svm_accuracy:.4f}')
print('Classification Report for One-Class SVM:')
print(svm_class_report)

Accuracy Score for One-Class SVM: 0.9403
Classification Report for One-Class SVM:
              precision    recall  f1-score   support

           0       1.00      0.94      0.97     28432
           1       0.02      0.86      0.05        49

    accuracy                           0.94     28481
   macro avg       0.51      0.90      0.51     28481
weighted avg       1.00      0.94      0.97     28481



In [15]:
# Calculate the total number of "grey area transactions" where the prediction is -1 (outliers marked as 1)
total_grey_area_transactions = (data['SVM_Predicted_Class'] == 1).sum()


# Find the total number of fraudulent transactions in the dataset
total_fraudulent_transactions = (data['Class'] == 1).sum()

# Calculate the percentage of "grey area transactions" relative to the total number of fraudulent transactions
percentage_grey_area_transactions = (total_grey_area_transactions / total_fraudulent_transactions) * 100

# Print the total and the percentage of "grey area transactions"
print(f'Total number of "grey area transactions": {total_grey_area_transactions}')
print(f'Total number of "fraudulent transactions": {total_fraudulent_transactions}')
print(f'Percentage of "grey area transactions" relative to total fraudulent transactions: {percentage_grey_area_transactions:.2f}%')


Total number of "grey area transactions": 1734
Total number of "fraudulent transactions": 49
Percentage of "grey area transactions" relative to total fraudulent transactions: 3538.78%


In [16]:
# Pseudocode:
# Filter the dataset to include only fraudulent transactions
# Train a One-Class SVM on this filtered dataset
# Predict "grey area transactions" as those where the One-Class SVM predicts an anomaly

# Python code:
# Filter out fraudulent transactions
fraud = data[data['Class'] == 1]
print('done1')

# Feature selection: Exclude 'Time' and 'Class' for training the model
X_fraud = fraud.drop(['Time', 'Class'], axis=1)
print('done2')

# Initialize One-Class SVM model
one_class_svm_fraud = OneClassSVM(kernel='rbf', gamma='auto', nu=0.01)
print('done3')

# Train the model on fraudulent data
one_class_svm_fraud.fit(X_fraud)
print('done4')

# Predict using the trained model (1 for inliers, -1 for outliers)
# Here, we're using the entire dataset for prediction to find "grey area transactions"
fraud_predictions = one_class_svm_fraud.predict(data.drop(['Time', 'Class'], axis=1))
data['Grey_Fraud'] = fraud_predictions
print('done5')


done1
done2
done3
done4
done5


In [17]:
from sklearn.metrics import classification_report, accuracy_score

# The model is trained on fraud, so it considers -1 as fraudulent (positive class)
# Map the model predictions to match the original class labels
# SVM prediction: -1 (anomaly) corresponds to 1 (fraud) in original data
# SVM prediction: 1 (normal) corresponds to 0 (non-fraud) in original data
data['SVM_Fraud_Pred'] = data['Grey_Fraud'].apply(lambda x: 1 if x == -1 else 0)

# True class labels from the dataset
Y_true = data['Class']

# Predicted class labels from the One-Class SVM trained on fraud
Y_pred_fraud_svm = data['SVM_Fraud_Pred']

# Calculate the accuracy score for the One-Class SVM model on the entire dataset
fraud_svm_accuracy = accuracy_score(Y_true, Y_pred_fraud_svm)

# Generate the classification report for the One-Class SVM model on the entire dataset
fraud_svm_class_report = classification_report(Y_true, Y_pred_fraud_svm)

# Print the accuracy score and classification report
print(f'Accuracy Score for One-Class SVM trained on Fraud: {fraud_svm_accuracy:.4f}')
print('Classification Report for One-Class SVM trained on Fraud:')
print(fraud_svm_class_report)


Accuracy Score for One-Class SVM trained on Fraud: 0.0013
Classification Report for One-Class SVM trained on Fraud:
              precision    recall  f1-score   support

           0       0.37      0.00      0.00     28432
           1       0.00      0.45      0.00        49

    accuracy                           0.00     28481
   macro avg       0.19      0.22      0.00     28481
weighted avg       0.37      0.00      0.00     28481



In [18]:
fraud_predictions.shape

(28481,)

In [19]:
# Pseudocode:
# Combine the "grey area transactions" from non-fraudulent and fraudulent predictions
# Label them correctly using the original dataset
# Train a Random Forest classifier to distinguish between legitimate and fraudulent transactions

# Python code:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

# Identify "grey area transactions" as those where either model predicted -1
grey_area = data[(data['Grey_Non_Fraud'] == -1) | (data['Grey_Fraud'] == -1)]
print('done1')
grey_area.shape



# Labels for "grey area transactions"
Y_grey = grey_area['Class']
X_grey = grey_area.drop(['Time', 'Class', 'Grey_Non_Fraud', 'Grey_Fraud'], axis=1)
print('done2')
# Split the "grey area transactions" into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X_grey, Y_grey, test_size=0.2, random_state=42)

# Initialize the Random Forest classifier
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
print('done3')
# Train the model
random_forest.fit(X_train, Y_train)
print('done4')
# Predict on the test set
rf_predictions = random_forest.predict(X_test)
print('done5')
# Evaluate the model
accuracy = accuracy_score(Y_test, rf_predictions)
print(f'Accuracy Score: {accuracy:.4f}')
print(classification_report(Y_test, rf_predictions))


done1
done2
done3
done4
done5
Accuracy Score: 0.9993
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5684
           1       0.75      0.75      0.75         8

    accuracy                           1.00      5692
   macro avg       0.87      0.87      0.87      5692
weighted avg       1.00      1.00      1.00      5692



In [20]:



# Print the accuracy score
print(f'Accuracy Score: {accuracy:.4f}')

Accuracy Score: 0.9993


In [26]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Prepare your features and target
X = data1.drop(['Time', 'Class'], axis=1)
y = data1['Class']

# Split the data (optional but recommended for fair evaluation)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,  random_state=42)

# Train the Random Forest
random_forest = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
random_forest.fit(X_train, y_train)

# Predict on the test set
y_pred_rf = random_forest.predict(X_test)

# Evaluate
print(f'Accuracy Score: {accuracy_score(y_test, y_pred_rf):.4f}')
print('Classification Report:')
print(classification_report(y_test, y_pred_rf, digits=2))


Accuracy Score: 0.9996
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.97      0.77      0.86        98

    accuracy                           1.00     56962
   macro avg       0.99      0.88      0.93     56962
weighted avg       1.00      1.00      1.00     56962



In [27]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import OneClassSVM
from sklearn.metrics import classification_report, accuracy_score

# 1. Prepare Data
# Assume 'data1' is your DataFrame with columns: ['Time', 'V1', ..., 'V28', 'Amount', 'Class']

# Separate features and target
X = data1.drop(['Time', 'Class'], axis=1)
y = data1['Class']

# 2. Train One-Class SVM on Non-Fraudulent Data Only
X_nonfraud = X[y == 0]
svm = OneClassSVM(gamma='auto', nu=0.01)  # You may tune gamma/nu
svm.fit(X_nonfraud)

# 3. Use SVM to Flag "Grey Area" Transactions in the Full Dataset
svm_pred = svm.predict(X)  # +1 for inlier (normal), -1 for outlier (anomaly)
# Add SVM prediction to the DataFrame
data1['SVM_Flag'] = svm_pred

# 4. Select Only the Flagged ("Grey Area") Transactions
flagged_data = data1[data1['SVM_Flag'] == -1]
X_flagged = flagged_data.drop(['Time', 'Class', 'SVM_Flag'], axis=1)
y_flagged = flagged_data['Class']

# 5. Split Flagged Data for Training/Testing (if enough samples)
from sklearn.model_selection import train_test_split
Xf_train, Xf_test, yf_train, yf_test = train_test_split(X_flagged, y_flagged, test_size=0.2, stratify=y_flagged, random_state=42)

# 6. Train Random Forest on Flagged Transactions
rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1, class_weight='balanced')
rf.fit(Xf_train, yf_train)

# 7. Predict and Evaluate on Flagged Test Set
yf_pred = rf.predict(Xf_test)
print("Performance on SVM-flagged transactions:")
print(f"Accuracy Score: {accuracy_score(yf_test, yf_pred):.4f}")
print(classification_report(yf_test, yf_pred, digits=4))

# 8. (Optional) Predict on All SVM-flagged Transactions for Full Evaluation
# (If you want to see performance on all flagged data, not just the test split)
# yf_pred_full = rf.predict(X_flagged)
# print(classification_report(y_flagged, yf_pred_full, digits=4))


Performance on SVM-flagged transactions:
Accuracy Score: 0.9945
              precision    recall  f1-score   support

           0     0.9971    0.9971    0.9971      1372
           1     0.9518    0.9518    0.9518        83

    accuracy                         0.9945      1455
   macro avg     0.9744    0.9744    0.9744      1455
weighted avg     0.9945    0.9945    0.9945      1455



In [30]:
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=42)

In [31]:
svm = OneClassSVM(gamma='auto', nu=0.01)  # You may tune gamma/nu
svm.fit(train_x)

In [32]:
y_predict_svm = svm.predict(test_x)

In [33]:
print(classification_report(test_y, y_predict_svm, digits=4))

              precision    recall  f1-score   support

          -1     0.0000    0.0000    0.0000         0
           0     0.0000    0.0000    0.0000     56864
           1     0.0004    0.2449    0.0009        98

    accuracy                         0.0004     56962
   macro avg     0.0001    0.0816    0.0003     56962
weighted avg     0.0000    0.0004    0.0000     56962



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
