# Credit Card Fraud Detection - Final Project

## 1. Class Imbalance Check

In [None]:
# Check class distribution
sns.countplot(x='Class', data=df)
plt.title('Distribution of Fraud vs. Non-Fraud')
plt.show()

print("Class distribution:")
print(df['Class'].value_counts(normalize=True))

## 2. Feature Scaling and Cleanup

In [None]:
# Normalize 'Amount' and drop 'Time'
scaler = StandardScaler()
df['normAmount'] = scaler.fit_transform(df[['Amount']])
df.drop(['Amount', 'Time'], axis=1, inplace=True)

## 3. Train-Test Split with Stratification

In [None]:
# Define features and labels
X = df.drop('Class', axis=1)
y = df['Class']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42)

## 4. Handling Imbalance with SMOTE

In [None]:
# Apply SMOTE to balance the training data
sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

print("Resampled class distribution:")
print(pd.Series(y_train_res).value_counts())

## 5. Logistic Regression Training

In [None]:
# Train logistic regression model
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train_res, y_train_res)
y_pred_lr = lr.predict(X_test)

## 6. Evaluation Metrics for Logistic Regression

In [None]:
# Evaluation metrics
print("Classification Report for Logistic Regression:")
print(classification_report(y_test, y_pred_lr))
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print("Precision:", precision_score(y_test, y_pred_lr))
print("Recall:", recall_score(y_test, y_pred_lr))
print("F1 Score:", f1_score(y_test, y_pred_lr))

## 7. Plot Confusion Matrix and Curves

In [None]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix, roc_curve, auc

conf_matrix = confusion_matrix(y_test, y_pred_lr)
sns.heatmap(conf_matrix, annot=True, fmt='d')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# ROC Curve
y_scores = lr.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()

## 8. Random Forest and XGBoost

In [None]:
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# Random Forest
rf = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
rf.fit(X_train_res, y_train_res)
y_pred_rf = rf.predict(X_test)

# XGBoost
xgb = XGBClassifier(scale_pos_weight=100, use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train_res, y_train_res)
y_pred_xgb = xgb.predict(X_test)

## 9. Model Comparison Table

In [None]:
models = {
    "Logistic Regression": y_pred_lr,
    "Random Forest": y_pred_rf,
    "XGBoost": y_pred_xgb
}

print("Model Comparison:")
for name, pred in models.items():
    print(f"\n{name}:")
    print("Precision:", precision_score(y_test, pred))
    print("Recall:", recall_score(y_test, pred))
    print("F1 Score:", f1_score(y_test, pred))
    print("ROC AUC:", roc_auc_score(y_test, pred))

## 10. Feature Importance from Random Forest

In [None]:
importances = rf.feature_importances_
feature_names = X.columns
indices = np.argsort(importances)[::-1]

plt.figure(figsize=(10, 6))
sns.barplot(x=importances[indices][:10], y=feature_names[indices][:10])
plt.title("Top 10 Important Features - Random Forest")
plt.show()

## 11. Deployment Plan (Markdown)

In [None]:
# Deployment Plan

# The model can be deployed using a REST API built with Flask or FastAPI. 
# We recommend using a real-time API setup to detect transactions as they occur. 
# The model can be containerized using Docker and deployed to cloud services like AWS Lambda, Google Cloud Run, or Azure Functions.
# Latency should be kept under 300ms per request.
# Monitoring should be implemented to track model drift and update the model periodically based on newly labeled data.