In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder

# URL for the dataset (from Kaggle)
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/creditcard.csv"

# Load the dataset directly from the URL
df = pd.read_csv(url)

# Inspect the first few rows of the dataset
print(df.head())

# Check for missing values
print("Missing values per column:\n", df.isnull().sum())

# Check basic information about the dataset (like data types)
print(df.info())

# Label Encoding for categorical columns if any (you can skip if no categorical variables)
le = LabelEncoder()
# Assuming there's a column like 'TransactionType' (if applicable), you can label encode it:
# df['TransactionType'] = le.fit_transform(df['TransactionType'])

# Feature Engineering: For instance, we could create a derived feature like AmountPerSecond
df['AmountPerSecond'] = df['Amount'] / (df['Time'] + 1)

# Separate features and target variable
X = df.drop(['Time', 'Class'], axis=1)  # Drop 'Class' (target) and 'Time' (optional feature)
y = df['Class']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# Initialize the Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model's performance
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Optional: Visualize the decision tree (if it's not too complex)
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

plt.figure(figsize=(20,10))
plot_tree(clf, filled=True, feature_names=X.columns, class_names=['Non-Fraud', 'Fraud'], rounded=True)
plt.show()
