<a href="https://colab.research.google.com/github/cicada0521/Finance/blob/main/Credit_Card_Fraud_Analytics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report
)
from imblearn.over_sampling import SMOTE

# 1. Load the Dataset
def load_credit_card_data():
    url = 'https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv'
    try:
        data = pd.read_csv(url)
        return data
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

# 2. Data Preprocessing
def preprocess_data(data):
    # Separate features and target
    X = data.drop('Class', axis=1)
    y = data['Class']

    # Normalize the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Apply SMOTE for oversampling
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

    return X_resampled, y_resampled

# 3. Model Training and Evaluation
def train_and_evaluate_models(X, y):
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Models to evaluate
    models = {
        'Logistic Regression': LogisticRegression(random_state=42),
        'Random Forest': RandomForestClassifier(random_state=42)
    }

    results = {}

    for name, model in models.items():
        # Train the model
        model.fit(X_train, y_train)

        # Predictions
        y_pred = model.predict(X_test)

        # Evaluation Metrics
        results[name] = {
            'Accuracy': accuracy_score(y_test, y_pred),
            'Precision': precision_score(y_test, y_pred),
            'Recall': recall_score(y_test, y_pred),
            'F1 Score': f1_score(y_test, y_pred),
            'Confusion Matrix': confusion_matrix(y_test, y_pred)
        }

    return results

# Main Execution
def main():
    # Load the data
    data = load_credit_card_data()

    if data is not None:
        # Initial Data Exploration
        print("Dataset Information:")
        print(data.info())

        print("\nClass Distribution:")
        print(data['Class'].value_counts(normalize=True))

        # Preprocess the data
        X_processed, y_processed = preprocess_data(data)

        # Train and evaluate models
        results = train_and_evaluate_models(X_processed, y_processed)

        # Print Results
        for model_name, metrics in results.items():
            print(f"\n{model_name} Results:")
            for metric_name, value in metrics.items():
                print(f"{metric_name}: {value}")

if __name__ == '__main__':
    main()

Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  fl