In [1]:

# LOGISTIC REGRESSION PROJECT
# Spam Email Detection


# Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Step 2: Create Dataset
data = {
    "Word_Count": [50, 200, 150, 30, 500, 120, 80, 300, 60, 250],
    "Has_Link":   [1, 0, 1, 1, 0, 1, 0, 0, 1, 0],
    "Caps_Ratio": [0.6, 0.1, 0.4, 0.8, 0.05, 0.3, 0.2, 0.1, 0.7, 0.15],
    "Spam":       [1, 0, 1, 1, 0, 1, 0, 0, 1, 0]
}

df = pd.DataFrame(data)

print("===== DATASET =====")
print(df)



===== DATASET =====
   Word_Count  Has_Link  Caps_Ratio  Spam
0          50         1        0.60     1
1         200         0        0.10     0
2         150         1        0.40     1
3          30         1        0.80     1
4         500         0        0.05     0
5         120         1        0.30     1
6          80         0        0.20     0
7         300         0        0.10     0
8          60         1        0.70     1
9         250         0        0.15     0


In [2]:

# Step 3: Prepare Data


X = df[["Word_Count", "Has_Link", "Caps_Ratio"]]
y = df["Spam"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)


# Step 4: Train Logistic Regression Model


model = LogisticRegression()
model.fit(X_train, y_train)


# Step 5: Evaluate Model


y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("\nModel Accuracy:", accuracy)
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


# Step 6: Interpret Probabilities


y_prob = model.predict_proba(X_test)

print("\nPredicted Probabilities (Not Spam, Spam):\n")
print(y_prob)


# Step 7: Test on New Email


# Example:
# Word Count = 40
# Has Link = 1
# Caps Ratio = 0.75

new_email = [[40, 1, 0.75]]

prediction = model.predict(new_email)
probability = model.predict_proba(new_email)

print("\nNew Email Prediction (1=Spam,0=Not Spam):", prediction[0])
print("Probability (Not Spam, Spam):", probability[0])


Model Accuracy: 1.0

Confusion Matrix:
 [[1 0]
 [0 2]]

Predicted Probabilities (Not Spam, Spam):

[[0.16582137 0.83417863]
 [0.82503712 0.17496288]
 [0.36791657 0.63208343]]

New Email Prediction (1=Spam,0=Not Spam): 1
Probability (Not Spam, Spam): [0.1240416 0.8759584]


