In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
import warnings 
warnings.filterwarnings('ignore')
sns.set_style('whitegrid')

In [None]:
# loading dataset

In [None]:
df=pd.read_csv("AIML Dataset.csv")

In [None]:
df.head()

In [None]:
df.info()


In [None]:
df.columns

In [None]:
df["isFraud"].value_counts()

In [None]:
df["isFlaggedFraud"].value_counts()

In [None]:
df.isnull().sum()

In [None]:
df.shape

In [None]:
#fraud percentage
fraud_percentage=(df["isFraud"].value_counts()[1]/df.shape[0])*100
print(fraud_percentage)

In [None]:
df["type"].value_counts().plot(kind="bar",title="Transaction Type Distribution",color="lightblue")
plt.xlabel("Transaction Type")
plt.ylabel("Count")
plt.show() 

In [None]:
fraud_by_types=df.groupby("type")["isFraud"].mean().sort_values(ascending=False)
fraud_by_types.plot(kind="bar",title="Fraud rate by Transaction Type",color="salmon")
plt.ylabel("Fraud Rate")

In [None]:
df["amount"].describe().astype(int)

In [None]:
sns.histplot(np.log1p(df["amount"]),bins=50,kde=True,color="purple")
plt.title("Transaction Amount Distribution(log scale)")
plt.xlabel("log(Amount+1)")
plt.show()

In [None]:
sns.boxplot(data=df[df["amount"]<50000],x="isFraud",y="amount")
plt.title("boxplot of amount by fraud status")
plt.xlabel("isFraud")
plt.ylabel("Amount")
plt.show()

In [None]:
df.columns

In [None]:
df.drop(columns=["step"],inplace=True)

In [None]:
df.head()

In [None]:
top_sender=df["nameOrig"].value_counts().head(10)

In [None]:
top_sender

In [None]:
top_receiver=df["nameDest"].value_counts().head(10)

In [None]:
top_receiver

In [None]:
fraud_types=df[df["type"].isin(["TRANSFER","CASH_OUT"])]

In [None]:
fraud_types["type"].value_counts()

In [None]:
sns.countplot(data=fraud_types,x="type",hue="isFraud",palette="Set2")
plt.title("Fraud count by Transaction type CASH_OUT and TRANSFER")

In [None]:
corr=df[[ "amount","oldbalanceOrg","newbalanceOrig","oldbalanceDest","newbalanceDest","isFraud"]].corr()

In [None]:
corr

In [None]:
sns.heatmap(corr,annot=True,cmap="coolwarm",fmt=".2f")
plt.title("Correlation matrix")
plt.show()

In [None]:
zero_after_transfer=df[(df["oldbalanceOrg"]>0)&
                       (df["newbalanceOrig"]==0)&
                       (df["type"].isin(["TRANSFER","CASH_OUT"]))
                       ]
print("Number of transactions with zero new balance after transfer or cash out:",zero_after_transfer.shape)

In [None]:
zero_after_transfer.head(10)

In [None]:
df["isFraud"].value_counts()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder


In [None]:
df_model=df.drop(columns=["nameOrig","nameDest","isFlaggedFraud"])

In [None]:
df_model.head()

In [None]:
categorical=["type"]
numerical=["amount","oldbalanceOrg","newbalanceOrig","oldbalanceDest","newbalanceDest"]

In [None]:
Y=df_model["isFraud"]
X=df_model.drop(columns=["isFraud"])

In [None]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3,random_state=42,stratify=Y)

In [None]:
preprocessor=ColumnTransformer(transformers=[
    ("num",StandardScaler(),numerical),
    ("cat",OneHotEncoder(drop="first"),categorical)
    ],remainder="drop")


In [None]:
pipeline=Pipeline([("prep",preprocessor),
                   ("clf",LogisticRegression(class_weight="balanced",max_iter=1000))

])

In [None]:
pipeline.fit(X_train,Y_train)

In [None]:
y_pred=pipeline.predict(X_test)

In [None]:
print(classification_report(Y_test,y_pred))

In [None]:
confusion_matrix(Y_test,y_pred)

In [None]:
pipeline.score(X_test,Y_test)

In [None]:
import joblib
joblib.dump(pipeline,"fraud_detection_model.pkl")

In [None]:
X_test.head()

In [None]:
X_train.columns


In [None]:
X_train["type"].unique()