In [7]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression, Perceptron
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
df = pd.read_csv('email.csv', encoding='latin-1')

In [8]:
# Preprocess the data
df = df.rename(columns={"Category": "label", "Message": "text"})
df = df[['label', 'text']]

# Handle missing values
df = df.dropna(subset=['label', 'text'])

# Map labels to binary values
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Ensure there are no NaN values in the target variable
df = df.dropna(subset=['label'])

In [9]:
# Feature extraction
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['text'])
y = df['label']

In [10]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [11]:
# Logistic Regression
model_lr = LogisticRegression()
model_lr.fit(X_train, y_train)
y_pred_lr = model_lr.predict(X_test)

# Linear Regression for Classification
model_linear = LinearRegression()
model_linear.fit(X_train, y_train)
y_pred_linear = model_linear.predict(X_test)
y_pred_linear_class = (y_pred_linear >= 0.5).astype(int)

# Perceptron Classifier
model_perceptron = Perceptron()
model_perceptron.fit(X_train, y_train)
y_pred_perceptron = model_perceptron.predict(X_test)

In [12]:
# Evaluation
accuracy_lr = accuracy_score(y_test, y_pred_lr)
report_lr = classification_report(y_test, y_pred_lr)

accuracy_linear = accuracy_score(y_test, y_pred_linear_class)
report_linear = classification_report(y_test, y_pred_linear_class)

accuracy_perceptron = accuracy_score(y_test, y_pred_perceptron)
report_perceptron = classification_report(y_test, y_pred_perceptron)

In [13]:
print(f'Logistic Regression Accuracy: {accuracy_lr}')
print('Logistic Regression Classification Report:')
print(report_lr)

print(f'Linear Regression (Classification) Accuracy: {accuracy_linear}')
print('Linear Regression (Classification) Report:')
print(report_linear)

print(f'Perceptron Classifier Accuracy: {accuracy_perceptron}')
print('Perceptron Classification Report:')
print(report_perceptron)

Logistic Regression Accuracy: 0.9557416267942583
Logistic Regression Classification Report:
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98      1448
         1.0       0.99      0.67      0.80       224

    accuracy                           0.96      1672
   macro avg       0.97      0.84      0.89      1672
weighted avg       0.96      0.96      0.95      1672

Linear Regression (Classification) Accuracy: 0.9754784688995215
Linear Regression (Classification) Report:
              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99      1448
         1.0       0.97      0.84      0.90       224

    accuracy                           0.98      1672
   macro avg       0.97      0.92      0.94      1672
weighted avg       0.98      0.98      0.97      1672

Perceptron Classifier Accuracy: 0.9868421052631579
Perceptron Classification Report:
              precision    recall  f1-score   support

       