<a href="https://colab.research.google.com/github/dominion-git/first-repo/blob/main/FRAUD_DETECTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import os
import kagglehub

# Download latest version
path = kagglehub.dataset_download("amanalisiddiqui/fraud-detection-dataset")

print("Path to dataset files:", path)


In [None]:
import warnings
warnings.filterwarnings('ignore')
sns.set(style="whitegrid")

In [None]:
path= kagglehub.dataset_download("amanalisiddiqui/fraud-detection-dataset")

In [None]:
print (path)

/kaggle/input/fraud-detection-dataset


In [None]:
import pandas as pd
import os

df = pd.read_csv(os.path.join(path, "AIML Dataset.csv"))
print(df.head())

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.columns

In [None]:
df["isFraud"].value_counts()

In [None]:
df["isFlaggedFraud"].value_counts()

In [None]:
df.isnull().sum().sum()

In [None]:
df.shape[0]

In [None]:
round((df["isFraud"].value_counts()[1] / df.shape[0]) * 100, 2)

In [None]:
df["type"].value_counts().plot(kind="bar", title="Transaction Types", color = "skyblue")
plt.xlabel("Transaction Type")
plt.ylabel("Count")
plt.show()

In [None]:
fraud_by_types = df.groupby(["type"])["isFraud"].mean().sort_values(ascending=False)
fraud_by_types.plot(kind="bar", title="Fraud Rate by Type", color="salmon")
plt.ylabel("Fraud Rate")
plt.show()

In [None]:
df["amount"].describe().astype(int)

In [None]:
sns.histplot(np.log1p(df["amount"]), bins=100, kde=True, color = "green")
plt.title("Transaction Amount Distribution (log scaled)")
plt.xlabel("Log (Amount + 1)")
plt.show()

In [None]:
sns.boxplot(data= df[df["amount"] < 50000], x= "isFraud", y="amount")
plt.title("Amount vs isFraud(Filtered under 50k)")
plt.show()

In [None]:
df["balanceDiffOrig"] = df["oldbalanceOrg"] - df["newbalanceOrig"]
df["balanceDiffDest"] = df["newbalanceDest"] - df["oldbalanceDest"]

In [None]:
(df["balanceDiffOrig"] < 0).sum()

In [None]:
(df["balanceDiffDest"] < 0).sum()

In [None]:
df.head(2)

In [None]:
fraud_per_step = df[df["isFraud"] == 1]["step"].value_counts().sort_index()
plt.plot(fraud_per_step.index , fraud_per_step.values, label="Frauds per Step")
plt.xlabel("Step (Time)")
plt.ylabel("Number of Frauds")
plt.title("Frauds Over Time")
plt.grid(True)
plt.show()

In [None]:
df.drop(columns="step", inplace=True)

In [None]:
df.head()

In [None]:
top_senders = df["nameOrig"].value_counts().head(10)

In [None]:
top_senders

In [None]:
top_receivers = df["nameDest"].value_counts().head(10)

In [None]:
top_receivers

In [None]:
fraud_users = df[df["isFraud"] == 1]["nameOrig"].value_counts().head(10)

In [None]:
fraud_users

In [None]:
fraud_types=  df[df["type"].isin(["CASH_OUT", "TRANSFER"])]

In [None]:
fraud_types["type"].value_counts()

In [None]:
sns.countplot(data=fraud_types, x="type", hue="isFraud")
plt.title("Fraudulent Distribution in Transfer & Cash_Out")
plt.show()

In [None]:
corr=df[["amount", "oldbalanceOrg", "newbalanceOrig", "oldbalanceDest", "newbalanceDest", "balanceDiffOrig", "balanceDiffDest", "isFraud"]].corr()

In [None]:
corr

In [None]:
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()

In [None]:
zero_after_transfer = df[
    (df["oldbalanceOrg"] > 0) &
    (df["newbalanceOrig"] == 0) &
     (df["type"] .isin(["TRANSFER", "CASH_OUT"]))
]

In [None]:
len(zero_after_transfer)

In [None]:
zero_after_transfer.head()

In [None]:
df["isFraud"].value_counts()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

In [None]:
df.head()

In [None]:
df_model = df.drop(["nameOrig", "nameDest", "isFlaggedFraud"], axis=1)

In [None]:
df_model.head()

In [None]:
categorical =["type"]
numeric = ["amount", "oldbalanceOrg", "newbalanceOrig", "oldbalanceDest", "newbalanceDest"]

In [None]:
y=df_model["isFraud"]
X=df_model.drop("isFraud", axis=1)

In [None]:
X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numeric),
        ("cat", OneHotEncoder(drop ="first"), categorical)
    ],
    remainder= "drop"
)

In [None]:
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", LogisticRegression(class_weight="balanced", max_iter=1000))
])

In [None]:
pipeline.fit(X_train, y_train)

In [None]:
y_pred = pipeline.predict(x_test)

In [None]:
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

In [None]:
confusion_matrix(y_test, y_pred)

In [None]:
pipeline.score(x_test, y_test) *100

In [None]:
# Separate the classes
fraud = df[df["isFraud"] == 1]
not_fraud = df[df["isFraud"] == 0]

# Undersample the majority class
not_fraud_sampled = not_fraud.sample(n=len(fraud), random_state=42)

# Combine them
df_balanced = pd.concat([fraud, not_fraud_sampled])

# Shuffle the data
df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)

In [None]:
from sklearn.model_selection import train_test_split

X = df_balanced.drop("isFraud", axis=1)
y = df_balanced["isFraud"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Define columns
categorical = ["type"]
numeric = ["amount", "oldbalanceOrg", "newbalanceOrig", "oldbalanceDest", "newbalanceDest"]

# Preprocessing
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numeric),
    ("cat", OneHotEncoder(drop="first"), categorical)
])

# Pipeline
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(n_estimators=50, max_depth=10, random_state=42))
])

# Train
pipeline.fit(X_train, y_train)

# Predict
y_pred = pipeline.predict(X_test)

# Evaluate
print(classification_report(y_test, y_pred))

In [None]:
import joblib
joblib.dump(pipeline, "fraud_detection_pipeline.pkl")

In [None]:
!pip install streamlit
!npm install -g localtunnel


In [None]:
import streamlit as st
import pandas as pd
import joblib

model = joblib.load("fraud_detection_pipeline.pkl")

st.title("Fraud Detection Prediction App")

st.markdown("Please enter the transaction details and use the predict button")

st.divider()

transaction_type = st.selectbox("Transaction Type", ["PAYMENT","TRANSFER","CASH_OUT","DEPOSIT"])
amount = st.number_input("Amount", min_value = 0.0, value= 1000.0)
oldbalanceOrg = st.number_input("old Balance (Sender)", min_value = 0.0, value = 10000.0)
newbalanceOrig = st.number_input("New Balance (Sender)", min_value = 0.0, value = 9000.0)
oldbalanceDest = st.number_input("old Balance (Receiver)", min_value=0.0, value = 0.0)
newbalanceDest = st.number_input("New Balance (Receiver)", min_value=0.0, value = 0.0)

if st.button("Predict"):
    input_data = pd.DataFrame([{
        "type" : transaction_type,
        "amount" : amount,
        "oldbalanceOrg": oldbalanceOrg,
        "newbalanceOrig": newbalanceOrig,
        "oldbalanceDest": oldbalanceDest,
        "newbalanceDest": newbalanceDest,
    }])


    prediction = model.predict(input_data)[0]

    st.subheader(f"Prediction: '{int(prediction)}'")

    if prediction ==1:
        st.error("This transaction can be fraud")
    else:
        st.success("This transaction looks like it is not a fraud")

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import joblib
import os

model_path = "fraud_detection_pipeline.pkl"

st.title("Fraud Detection Prediction App")
st.markdown("Please enter the transaction details and use the predict button")
st.divider()

transaction_type = st.selectbox("Transaction Type", ["PAYMENT", "TRANSFER", "CASH_OUT", "DEPOSIT"])
amount = st.number_input("Amount", min_value=0.0, value=1000.0)
oldbalanceOrg = st.number_input("Old Balance (Sender)", min_value=0.0, value=10000.0)
newbalanceOrig = st.number_input("New Balance (Sender)", min_value=0.0, value=9000.0)
oldbalanceDest = st.number_input("Old Balance (Receiver)", min_value=0.0, value=0.0)
newbalanceDest = st.number_input("New Balance (Receiver)", min_value=0.0, value=0.0)

if st.button("Predict"):
    if os.path.exists(model_path):
        model = joblib.load(model_path)

        input_data = pd.DataFrame([{
            "type": transaction_type,
            "amount": amount,
            "oldbalanceOrg": oldbalanceOrg,
            "newbalanceOrig": newbalanceOrig,
            "oldbalanceDest": oldbalanceDest,
            "newbalanceDest": newbalanceDest,
        }])

        prediction = model.predict(input_data)[0]
        st.subheader(f"Prediction: '{int(prediction)}'")

        if prediction == 1:
            st.error("⚠️ This transaction can be fraud")
        else:
            st.success("✅ This transaction looks safe")
    else:
        st.error("❌ Model file not found. Make sure 'fraud_detection_pipeline.pkl' is present.")


In [None]:
!pip install streamlit
!npm install -g localtunnel
!streamlit run app.py & npx localtunnel --port 8501


[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K
changed 22 packages in 3s
[1G[0K⠋[1G[0K
[1G[0K⠋[1G[0K3 packages are looking for funding
[1G[0K⠋[1G[0K  run `npm fund` for details
[1G[0K⠋[1G[0K[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.148.5.171:8501[0m
[0m
[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0Kyour url is: https://cool-snakes-speak.loca.lt
/tools/node/lib/