In [2]:
# Install required libraries
!pip install pandas numpy scikit-learn matplotlib seaborn imbalanced-learn




In [3]:
import pandas as pd

# Load dataset
df = pd.read_csv("creditcard.csv")

# Show first 5 rows
print(df.head())

# Show dataset info
print(df.info())

# Check class distribution (fraud vs normal)
print(df['Class'].value_counts())

   Time        V1        V2        V3        V4        V5        V6        V7  \
0     0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1     0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2     1 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3     1 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4     2 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

# 1. Check missing values
print("Missing values:\n", df.isnull().sum())

# 2. Separate features (X) and target (y)
X = df.drop("Class", axis=1)
y = df["Class"]

# 3. Scale 'Amount' and 'Time' columns
scaler = StandardScaler()
X[['Amount', 'Time']] = scaler.fit_transform(X[['Amount', 'Time']])

# 4. Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Before SMOTE:", y_train.value_counts())

# 5. Balance the dataset with SMOTE (only on training set)
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

print("After SMOTE:", y_train_res.value_counts())


Missing values:
 Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    1
Class     1
dtype: int64


ValueError: Input y contains NaN.

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# Train logistic regression
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X_train_res, y_train_res)

# Predict on test data
y_pred = log_reg.predict(X_test)
y_prob = log_reg.predict_proba(X_test)[:,1]

# Evaluation
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))
print("\nROC-AUC Score:", roc_auc_score(y_test, y_prob))


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# Train
rf_model = RandomForestClassifier(
    n_estimators=200,       # number of trees
    max_depth=None,         # let trees grow fully
    class_weight="balanced",# handle imbalance
    random_state=42,
    n_jobs=-1
)
rf_model.fit(X_train, y_train)

# Predict
y_pred = rf_model.predict(X_test)
y_prob = rf_model.predict_proba(X_test)[:, 1]

# Evaluate
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nROC-AUC Score:", roc_auc_score(y_test, y_prob))


In [None]:
from xgboost import XGBClassifier

xgb_model = XGBClassifier(
    n_estimators=300,
    learning_rate=0.1,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    scale_pos_weight=len(y_train[y_train==0]) / len(y_train[y_train==1]), # balance fraud class
    random_state=42,
    n_jobs=-1
)

xgb_model.fit(X_train, y_train)

y_pred = xgb_model.predict(X_test)
y_prob = xgb_model.predict_proba(X_test)[:, 1]

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nROC-AUC Score:", roc_auc_score(y_test, y_prob))


In [None]:
import joblib

joblib.dump(log_reg, "log_reg_model.pkl")
joblib.dump(rf_model, "rf_model.pkl")
joblib.dump(xgb_model, "xgb_model.pkl")

print("✅ Models saved successfully")


In [None]:
import joblib

def load_model(path="xgb_model.pkl"):
    return joblib.load(path)

model = load_model()
# Example prediction
sample = X_test.iloc[0:5]
preds = model.predict(sample)
print(preds)


In [None]:
!pip install streamlit pyngrok

In [None]:
%%writefile app.py
import streamlit as st
import joblib
import pandas as pd

# Load the trained XGBoost model (default)
model = joblib.load("xgb_model.pkl")

st.title("🔍 Fraud Detection System")
st.write("Upload transaction data to check if it is fraud or not.")

# File uploader
uploaded_file = st.file_uploader("Upload a CSV file with transaction(s)", type="csv")

if uploaded_file is not None:
    # Read uploaded file
    data = pd.read_csv(uploaded_file)

    st.write("📊 Uploaded Data Preview:")
    st.dataframe(data.head())

    # Make predictions
    preds = model.predict(data)
    data["Prediction"] = preds
    data["Prediction"] = data["Prediction"].map({0: "Legit ✅", 1: "Fraud ⚠️"})

    st.write("🔮 Predictions:")
    st.dataframe(data)


In [None]:
!pip install streamlit pyngrok --quiet


In [None]:
from pyngrok import ngrok
!ngrok config add-authtoken 33HMKFw123YQMgZbTdT69qFtp5U_FkkZq312eEJr3GaqjzuL

In [None]:
from pyngrok import ngrok

# Kill old tunnels
ngrok.kill()

# Run Streamlit in background
!streamlit run app.py --server.port 8501 &>/dev/null&

# Open new ngrok tunnel
public_url = ngrok.connect(addr="8501")
print("🚀 Your Streamlit app is live here:", public_url)
