<a href="https://colab.research.google.com/github/au312821243036/CODSOFT/blob/main/CREDIT_CARD_FRAUD_DETECTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Data Preprocessing

In [1]:
import pandas as pd
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=10000, n_features=20, n_informative=2, n_redundant=10, n_clusters_per_class=1, weights=[0.99], flip_y=0, random_state=1)
df = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(20)])
df['target'] = y
df.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,target
0,-0.820834,2.195135,-0.97868,1.030117,-1.147746,-1.817407,-0.963045,1.359982,0.548881,1.762998,...,1.855715,0.200304,-1.954514,0.812396,1.358491,-0.207073,-2.30506,0.914914,-0.173738,0
1,0.286163,0.725973,0.609034,0.122395,-0.522488,-0.528128,0.869972,0.471923,-0.48093,1.000125,...,0.225881,0.250127,-0.326474,-0.862885,0.875266,-1.164598,0.177483,0.922049,0.330389,0
2,-0.723514,3.46931,-0.587049,0.447852,-1.961005,-2.797293,0.334829,2.172181,0.185844,3.215482,...,2.533802,0.203929,-2.759834,0.30664,2.585355,-0.862308,-0.691784,2.083386,-0.525589,0
3,-0.248059,1.246615,-0.191007,0.500142,-0.707696,-1.003583,1.282019,0.780996,0.05262,1.164322,...,0.902173,1.229571,-0.984844,-0.845807,0.938092,-0.647217,-0.938415,0.761856,0.024162,0
4,-0.061802,0.738228,0.029221,1.103316,-0.440895,-0.58318,-0.433219,0.465875,-0.069932,0.753141,...,0.475069,-0.353391,-0.53439,-0.276873,0.620531,0.030376,0.188657,0.545693,-0.728567,0


# Handle Missing Values and Normalize Data

In [3]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df.drop(columns=['target']))
y = df['target']

# Handle Class Imbalance

In [4]:
from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

# Split the Data

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train a Classification Model

In [6]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluate the Model

In [7]:
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
y_pred = model.predict(X_test)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
precision, recall, f1, report

(0.991869918699187,
 0.99846547314578,
 0.9951567677797603,
 '              precision    recall  f1-score   support\n\n           0       1.00      0.99      1.00      2005\n           1       0.99      1.00      1.00      1955\n\n    accuracy                           1.00      3960\n   macro avg       1.00      1.00      1.00      3960\nweighted avg       1.00      1.00      1.00      3960\n')

# Summary

In [8]:
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
X, y = make_classification(n_samples=10000, n_features=20, n_informative=2, n_redundant=10, n_clusters_per_class=1, weights=[0.99], flip_y=0, random_state=1)
df = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(20)])
df['target'] = y
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df.drop(columns=['target']))
y = df['target']
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
precision, recall, f1, report

(0.991869918699187,
 0.99846547314578,
 0.9951567677797603,
 '              precision    recall  f1-score   support\n\n           0       1.00      0.99      1.00      2005\n           1       0.99      1.00      1.00      1955\n\n    accuracy                           1.00      3960\n   macro avg       1.00      1.00      1.00      3960\nweighted avg       1.00      1.00      1.00      3960\n')