In [None]:

import pandas as pd
import numpy as np

# Number of rows to generate
num_rows = 1000

# Possible transaction types
transaction_types = ['CASH_OUT', 'PAYMENT', 'CASH_IN', 'TRANSFER', 'DEBIT']

# Generate random data
data = {
    'step': np.random.randint(1, 500, num_rows),
    'type': np.random.choice(transaction_types, num_rows),
    'amount': np.random.uniform(10, 1000000, num_rows),
    'nameOrig': ['C' + str(np.random.randint(100000000, 9999999999)) for _ in range(num_rows)],
    'oldbalanceOrg': np.random.uniform(0, 10000000, num_rows),
    'newbalanceOrig': np.random.uniform(0, 10000000, num_rows),
    'nameDest': ['C' + str(np.random.randint(100000000, 9999999999)) for _ in range(num_rows)],
    'oldbalanceDest': np.random.uniform(0, 10000000, num_rows),
    'newbalanceDest': np.random.uniform(0, 10000000, num_rows),
    'isFraud': np.random.choice([0, 1], num_rows, p=[0.95, 0.05]),  # 5% fraud probability
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Create derived columns
df['isPayment'] = df['type'].isin(['PAYMENT', 'DEBIT']).astype(int)
df['isMovement'] = df['type'].isin(['CASH_OUT', 'TRANSFER']).astype(int)
df['accountDiff'] = df['oldbalanceOrg'] - df['oldbalanceDest']

# Save as CSV file
df.to_csv('synthetic_transactions.csv', index=False)

# Display sample rows
print(df.head())



   step      type         amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0   440     DEBIT  851781.455899   C975977272   4.892809e+06    8.092566e+06   
1   394  TRANSFER  363869.111096   C957940981   4.542041e+06    3.796573e+06   
2   151  TRANSFER  235560.782881  C2081115568   3.541470e+06    9.071696e+06   
3   498  CASH_OUT  996054.670001  C2015635871   8.516453e+06    1.537307e+06   
4   234     DEBIT  956296.512551  C7238202914   2.891449e+05    7.681907e+06   

      nameDest  oldbalanceDest  newbalanceDest  isFraud  isPayment  \
0  C2953133876    7.769211e+06    1.851807e+06        0          1   
1  C1086231413    6.719211e+06    6.713184e+06        0          0   
2  C6100773198    4.891592e+06    7.565065e+06        0          0   
3  C5498098883    5.302150e+06    2.725075e+06        1          0   
4  C8981953452    4.234532e+06    9.049453e+06        0          1   

   isMovement   accountDiff  
0           0 -2.876402e+06  
1           1 -2.177170e+06  
2       

In [None]:
import seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

transactions = pd.read_csv('/content/synthetic_transactions.csv')
print(transactions.head())
print(transactions.info())

isFraud = transactions['isFraud']
print(np.sum(isFraud))


print(transactions['amount'].describe())


transactions['isPayment'] = transactions['type'].isin(['PAYMENT', 'DEBIT']).astype(int)

transactions['isMovement'] = transactions['type'].isin(['CASH_OUT', 'TRANSFER']).astype(int)

transactions['accountDiff'] = transactions['oldbalanceOrg'] - transactions['oldbalanceDest']


features = ['amount', 'isPayment', 'isMovement', 'accountDiff']
label = transactions['isFraud']

X_train, X_test, y_train, y_test = train_test_split(transactions[features], label, test_size=0.3, train_size=0.7, random_state=42)



scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = LogisticRegression()
model.fit(X_train_scaled, y_train)


print("Training Accuracy:", model.score(X_train_scaled, y_train))

print("Test Accuracy:", model.score(X_test_scaled, y_test))


print("Model Coefficients:", model.coef_)

# New transaction data
transaction1 = np.array([123456.78, 0, 1, 54670.1])
transaction2 = np.array([98765.43, 1, 0, 8524.75])
transaction3 = np.array([543678.31, 1, 0, 510025.5])
your_transaction = np.array([534777.54, 0, 1, 432121.6])

sample_transactions = np.vstack((transaction1, transaction2, transaction3, your_transaction))

sample_transactions_scaled = scaler.transform(sample_transactions)

print("Predictions:", model.predict(sample_transactions_scaled))
print("Prediction Probabilities:", model.predict_proba(sample_transactions_scaled))


   step      type         amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0   440     DEBIT  851781.455899   C975977272   4.892809e+06    8.092566e+06   
1   394  TRANSFER  363869.111096   C957940981   4.542041e+06    3.796573e+06   
2   151  TRANSFER  235560.782881  C2081115568   3.541470e+06    9.071696e+06   
3   498  CASH_OUT  996054.670001  C2015635871   8.516453e+06    1.537307e+06   
4   234     DEBIT  956296.512551  C7238202914   2.891449e+05    7.681907e+06   

      nameDest  oldbalanceDest  newbalanceDest  isFraud  isPayment  \
0  C2953133876    7.769211e+06    1.851807e+06        0          1   
1  C1086231413    6.719211e+06    6.713184e+06        0          0   
2  C6100773198    4.891592e+06    7.565065e+06        0          0   
3  C5498098883    5.302150e+06    2.725075e+06        1          0   
4  C8981953452    4.234532e+06    9.049453e+06        0          1   

   isMovement   accountDiff  
0           0 -2.876402e+06  
1           1 -2.177170e+06  
2       

