In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

try:
    df=pd.read_csv('../data/fraudTest.csv')
    print("Data loaded successfully!")
except FileNotFoundError:
    print("Error: 'fraudTest.csv' not found. Make sure the file is in the '/data' folder.")

print("\nFirst 5 rows of the dataset:")
print(df.head())

print("\nDataFrame Info:")
print(df.info())

print("\nClass Distribution:")
print(df['is_fraud'].value_counts(normalize=True) * 100)


In [None]:
from imblearn.over_sampling import SMOTE
from collections import Counter

df['trans_date_trans_time'] = pd.to_datetime(df['trans_date_trans_time'])

df.rename(columns={'amt': 'Amount'}, inplace=True)

df['cc_num'] = df['cc_num'].astype(str)
df.sort_values(by=['cc_num', 'trans_date_trans_time'], inplace=True)
df['time_since_last_transaction'] = df.groupby('cc_num')['trans_date_trans_time'].diff().dt.total_seconds() / 60

df['transaction_frequency_3hr'] = df.groupby('cc_num')['trans_date_trans_time'].rolling('3H').count().reset_index(level=0, drop=True)

df = pd.get_dummies(df, columns=['gender', 'job'], drop_first=True)

df.drop(columns=['trans_date_trans_time', 'merchant', 'cc_num'], inplace=True)

print("\nDataFrame Info after Preprocessing:")
print(df.info())
print("\nFinal DataFrame Head:")
print(df.head())

X = df.drop('is_fraud', axis=1)
y = df['is_fraud']

X['time_since_last_transaction'].fillna(0, inplace=True)

sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X, y)

print('\nOriginal dataset shape %s' % Counter(y))
print('Resampled dataset shape %s' % Counter(y_res))