In [1]:
# Step 1: Required libraries import karo
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Step 2: Dataset load karo
df = pd.read_csv("credit_card_fraud.csv")
print(df.head())  # Data preview

# Step 3: Features aur target define karo
X = df.drop("fraud", axis=1)
y = df["fraud"]  # 1 = Fraud, 0 = Legit

# Step 4: Categorical aur numeric columns identify karo
categorical_cols = ["transaction_type", "location", "card_present"]
numeric_cols = ["amount", "hour"]

# Step 5: Preprocessing setup using OneHotEncoder
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ],
    remainder='passthrough'
)

# Step 6: Pipeline create karo (encoding + model)
pipeline = Pipeline(steps=[
    ("preprocessing", preprocessor),
    ("classifier", RandomForestClassifier(random_state=42))
])

# Step 7: Train/Test split karo
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 8: Model ko train karo
pipeline.fit(X_train, y_train)

# Step 9: Test data par prediction karo
y_pred = pipeline.predict(X_test)

# Step 10: Model evaluate karo
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


   amount transaction_type       location card_present  hour  fraud
0  375.17              atm  international          yes    12      0
1  950.76              pos  international           no    12      0
2  732.26              atm       domestic          yes     1      0
3  599.06              atm  international          yes     8      0
4  156.86           online  international          yes    23      0
Accuracy: 0.915
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.99      0.95       181
           1       0.67      0.21      0.32        19

    accuracy                           0.92       200
   macro avg       0.79      0.60      0.64       200
weighted avg       0.90      0.92      0.89       200

