In [None]:
!pip install pandas scikit-learn matplotlib seaborn

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve, auc
import joblib
import os

# Load the dataset
url = "https://storage.googleapis.com/qwasar-public/track-ds/my_paypal_creditcard.csv" if not os.path.exists('my_paypal_creditcard.csv') else 'my_paypal_creditcard.csv'
df = pd.read_csv(url)

# Split the data into features (X) and target variable (y)
X = df.drop('Class', axis=1)
y = df['Class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save the trained model
joblib.dump(model, 'fraud_detection_model.joblib')

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Calculate precision-recall curve and area under the curve (AUC)
precision, recall, _ = precision_recall_curve(y_test, y_pred)
auc_score = auc(recall, precision)
print(f"\nArea Under the Precision-Recall Curve (AUC-PRC): {auc_score}")

# Now, you can use this trained model for making predictions on new data
# Example: new_data_predictions = model.predict(new_data)
