<a href="https://colab.research.google.com/github/aqsa123-dotcom/SMS-SPAM-checker/blob/main/aqsa.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from imblearn.over_sampling import SMOTE
from sklearn.impute import SimpleImputer

# Load dataset (assuming you have the 'creditcard.csv' dataset)
df = pd.read_csv('creditcard.csv')

# Display basic info about the dataset
print(df.info())
print(df.head())

# Check for missing values
print(df.isnull().sum())

# Impute missing values using SimpleImputer (mean imputation)
imputer = SimpleImputer(strategy='mean')
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

# Features (all columns except 'Class') and target ('Class')
X = df_imputed.drop(columns=['Class'])
y = df_imputed['Class']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Handle the imbalanced dataset using SMOTE (Synthetic Minority Over-sampling Technique)
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Scale the features (important for models like Logistic Regression)
scaler = StandardScaler()
X_train_resampled = scaler.fit_transform(X_train_resampled)
X_test = scaler.transform(X_test)

# Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train_resampled, y_train_resampled)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Print evaluation metrics
print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

# Example usage: predicting fraud on a new transaction (make sure to scale the input)
new_transaction = [[-1.359807, 1.191857, -0.358440, -1.340163, 1.011978, -0.708304, 0.490471, 0.121478, -0.276399, 0.451929, 0.396089, 0.480303, 0.616109, -0.874299, 0.329682, 0.011722, 0.094178, 0.592941, -0.270533, 0.817739, 0.753074, -0.822843, 0.538196, -0.313769, 0.515399, 0.207971, 0.025791, 0.403993, 1.234235, 1.341304]]  # Example feature values
new_transaction_scaled = scaler.transform(new_transaction)
fraud_prediction = model.predict(new_transaction_scaled)
print(f"Prediction for new transaction: {'Fraud' if fraud_prediction[0] == 1 else 'Not Fraud'}")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

