In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn import svm


df = pd.read_csv('Synthetic_Financial_datasets_log.csv') #Gets data from CSV file 
df

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.00,160296.36,M1979787155,0.00,0.00,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.00,19384.72,M2044282225,0.00,0.00,0,0
2,1,TRANSFER,181.00,C1305486145,181.00,0.00,C553264065,0.00,0.00,1,0
3,1,CASH_OUT,181.00,C840083671,181.00,0.00,C38997010,21182.00,0.00,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.00,29885.86,M1230701703,0.00,0.00,0,0
...,...,...,...,...,...,...,...,...,...,...,...
6362615,743,CASH_OUT,339682.13,C786484425,339682.13,0.00,C776919290,0.00,339682.13,1,0
6362616,743,TRANSFER,6311409.28,C1529008245,6311409.28,0.00,C1881841831,0.00,0.00,1,0
6362617,743,CASH_OUT,6311409.28,C1162922333,6311409.28,0.00,C1365125890,68488.84,6379898.11,1,0
6362618,743,TRANSFER,850002.52,C1685995037,850002.52,0.00,C2080388513,0.00,0.00,1,0


In [2]:
print("Sample number: " + str(len(df.index)))

Sample number: 6362620


# Data Cleaning

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6362620 entries, 0 to 6362619
Data columns (total 11 columns):
 #   Column          Dtype  
---  ------          -----  
 0   step            int64  
 1   type            object 
 2   amount          float64
 3   nameOrig        object 
 4   oldbalanceOrg   float64
 5   newbalanceOrig  float64
 6   nameDest        object 
 7   oldbalanceDest  float64
 8   newbalanceDest  float64
 9   isFraud         int64  
 10  isFlaggedFraud  int64  
dtypes: float64(5), int64(3), object(3)
memory usage: 534.0+ MB


In [4]:
df.describe

<bound method NDFrame.describe of          step      type      amount     nameOrig  oldbalanceOrg  \
0           1   PAYMENT     9839.64  C1231006815      170136.00   
1           1   PAYMENT     1864.28  C1666544295       21249.00   
2           1  TRANSFER      181.00  C1305486145         181.00   
3           1  CASH_OUT      181.00   C840083671         181.00   
4           1   PAYMENT    11668.14  C2048537720       41554.00   
...       ...       ...         ...          ...            ...   
6362615   743  CASH_OUT   339682.13   C786484425      339682.13   
6362616   743  TRANSFER  6311409.28  C1529008245     6311409.28   
6362617   743  CASH_OUT  6311409.28  C1162922333     6311409.28   
6362618   743  TRANSFER   850002.52  C1685995037      850002.52   
6362619   743  CASH_OUT   850002.52  C1280323807      850002.52   

         newbalanceOrig     nameDest  oldbalanceDest  newbalanceDest  isFraud  \
0             160296.36  M1979787155            0.00            0.00        0   

In [5]:
df["type"].unique()

array(['PAYMENT', 'TRANSFER', 'CASH_OUT', 'DEBIT', 'CASH_IN'],
      dtype=object)

In [6]:
df.isnull()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...
6362615,False,False,False,False,False,False,False,False,False,False,False
6362616,False,False,False,False,False,False,False,False,False,False,False
6362617,False,False,False,False,False,False,False,False,False,False,False
6362618,False,False,False,False,False,False,False,False,False,False,False


In [7]:
df.isnull().sum()

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64

# Logistic Regression


In [8]:
X = df[["type", "amount", "oldbalanceOrg", "newbalanceOrig", "oldbalanceDest", "newbalanceDest"]]  # Features
y = df['isFraud']  # Target labels

label_encoder = LabelEncoder()
X['type'] = label_encoder.fit_transform(X['type'])


#Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['type'] = label_encoder.fit_transform(X['type'])


In [9]:
model = LogisticRegression(max_iter=200)


In [10]:
# Train the model
model.fit(X_train, y_train)

LogisticRegression(max_iter=200)

In [11]:
X_train

Unnamed: 0,type,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest
6271196,0,386385.08,4669568.85,5055953.92,506035.06,119649.98
1888243,0,212458.78,234635.00,447093.78,806037.88,593579.10
5549957,3,19967.60,3634.00,0.00,0.00,0.00
2025342,1,527616.51,180216.00,0.00,92157.10,619773.61
682342,4,206067.85,0.00,0.00,2131494.48,2337562.32
...,...,...,...,...,...,...
1570006,0,129715.85,5054252.83,5183968.68,246692.94,116977.09
2234489,3,2459.70,0.00,0.00,0.00,0.00
4926484,1,10579.16,59279.00,48699.84,322754.16,333333.32
4304572,1,73020.76,20289.00,0.00,256102.84,329123.61


In [12]:
y_train

6271196    0
1888243    0
5549957    0
2025342    0
682342     0
          ..
1570006    0
2234489    0
4926484    0
4304572    0
1692743    0
Name: isFraud, Length: 5090096, dtype: int64

In [13]:
# Make predictions
y_pred = model.predict(X_test)

In [14]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

Model Accuracy: 1.00


In [16]:
# Taking user inputs


type = (input("Enter the type (0: PAYMENT', '1: TRANSFER', '2: CASH_OUT', '3: DEBIT', '4: CASH_IN): "))

amount = float(input("Enter the amount: "))
oldbalanceOrig = float(input("Enter the initial balance before the transaction: "))
newbalanceOrig = float(input("Enter the new balance after the transaction: "))

oldbalanceDest = float(input("Enter the initial recipient's balance before the transaction: "))
newbalanceDest = float(input("Enter the new recipient's balance after the transaction: "))

# Create a DataFrame with correct column names that match the ones used during training
features = pd.DataFrame([[type, amount, oldbalanceOrig, newbalanceOrig, oldbalanceDest, newbalanceDest]])

# Now make the prediction
Predict = model.predict(features)

# Check the prediction result
if Predict[0] == 0:
    print("Our model predicts that it is not a fraud.")
elif Predict[0] == 1:
    print("Our model predicts that it is a fraud.")



"""
# Example test cases
1000.0, 5000.0, 4000.0, 1000.0, 2000.0),  # PAYMENT -> 0
200.0, 3000.0, 2800.0, 500.0, 700.0),    # TRANSFER -> 0
150.0, 2500.0, 2350.0, 1000.0, 1000.0),  # CASH_OUT -> 0
100.0, 1000.0, 900.0, 200.0, 200.0),     # DEBIT -> 0
2500.0, 10000.0, 7500.0, 3000.0, 5500.0), # PAYMENT -> 0
"""



"""
# Example test cases
PAYMENT	9839.64	C1231006815	170136.00	160296.36	M1979787155	0.00	0.00	0	0
PAYMENT	1864.28	C1666544295	21249.00	19384.72	M2044282225	0.00	0.00	0	0
TRANSFER	181.00	C1305486145	181.00	0.00	C553264065	0.00	0.00	1	0
CASH_OUT	181.00	C840083671	181.00	0.00	C38997010	21182.00	0.00	1	0
PAYMENT	11668.14	C2048537720	41554.00	29885.86	M1230701703	0.00	0.00	0	0
...	...	...	...	...	...	...	...	...	...	...	...
CASH_OUT	339682.13	C786484425	339682.13	0.00	C776919290	0.00	339682.13	1	0
TRANSFER	6311409.28	C1529008245	6311409.28	0.00	C1881841831	0.00	0.00	1	0
CASH_OUT	6311409.28	C1162922333	6311409.28	0.00	C1365125890	68488.84	6379898.11	1	0
TRANSFER	850002.52	C1685995037	850002.52	0.00	C2080388513	0.00	0.00	1	0
CASH_OUT	850002.52	C1280323807	850002.52	0.00	C873221189	6510099.11	7360101.63	1	0
"""

Enter the type (0: PAYMENT', '1: TRANSFER', '2: CASH_OUT', '3: DEBIT', '4: CASH_IN): 0
Enter the amount: 11668.14
Enter the initial balance before the transaction: 41554.00
Enter the new balance after the transaction: 29885.86
Enter the initial recipient's balance before the transaction: 0.00
Enter the new recipient's balance after the transaction: 0.00
Our model predicts that it is not a fraud.




'\n# Example test cases\nPAYMENT\t9839.64\tC1231006815\t170136.00\t160296.36\tM1979787155\t0.00\t0.00\t0\t0\nPAYMENT\t1864.28\tC1666544295\t21249.00\t19384.72\tM2044282225\t0.00\t0.00\t0\t0\nTRANSFER\t181.00\tC1305486145\t181.00\t0.00\tC553264065\t0.00\t0.00\t1\t0\nCASH_OUT\t181.00\tC840083671\t181.00\t0.00\tC38997010\t21182.00\t0.00\t1\t0\nPAYMENT\t11668.14\tC2048537720\t41554.00\t29885.86\tM1230701703\t0.00\t0.00\t0\t0\n...\t...\t...\t...\t...\t...\t...\t...\t...\t...\t...\t...\nCASH_OUT\t339682.13\tC786484425\t339682.13\t0.00\tC776919290\t0.00\t339682.13\t1\t0\nTRANSFER\t6311409.28\tC1529008245\t6311409.28\t0.00\tC1881841831\t0.00\t0.00\t1\t0\nCASH_OUT\t6311409.28\tC1162922333\t6311409.28\t0.00\tC1365125890\t68488.84\t6379898.11\t1\t0\nTRANSFER\t850002.52\tC1685995037\t850002.52\t0.00\tC2080388513\t0.00\t0.00\t1\t0\nCASH_OUT\t850002.52\tC1280323807\t850002.52\t0.00\tC873221189\t6510099.11\t7360101.63\t1\t0\n'