# Credit Card Fraud Detection
This notebook covers the task of detecting fraudulent credit card transactions.
We will use the popular Credit Card Fraud Detection dataset from Kaggle.
The notebook includes dataset download, preprocessing, model training, and evaluation.

In [None]:
# Install necessary libraries
!pip install -q pandas scikit-learn numpy


In [None]:
# Download dataset
import os
import urllib.request

dataset_url = 'https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv'
dataset_path = 'creditcard.csv'

if not os.path.exists(dataset_path):
    print('Downloading dataset...')
    urllib.request.urlretrieve(dataset_url, dataset_path)
    print('Download complete.')
else:
    print('Dataset already exists.')


In [None]:
# Load dataset
import pandas as pd

df = pd.read_csv(dataset_path)
df.head()

## Data Preprocessing
- The dataset is highly imbalanced.
- We will use undersampling to balance the classes for training.
- Features are already scaled except for 'Amount'.


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# Separate features and target
X = df.drop('Class', axis=1)
y = df['Class']

# Scale 'Amount' feature
scaler = StandardScaler()
X['Amount'] = scaler.fit_transform(X['Amount'].values.reshape(-1,1))

# Undersample majority class
fraud = df[df['Class'] == 1]
non_fraud = df[df['Class'] == 0].sample(n=len(fraud), random_state=42)
df_balanced = pd.concat([fraud, non_fraud])

X_balanced = df_balanced.drop('Class', axis=1)
y_balanced = df_balanced['Class']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=0.2, random_state=42)


## Model Training
We will train a Random Forest classifier to detect fraud.

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
print('Accuracy:', accuracy_score(y_test, y_pred))
print('\nClassification Report:\n', classification_report(y_test, y_pred))