Spam Mail Detection using Logistics regression by Yugeshwar.P

In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from google.colab import files

url = "https://raw.githubusercontent.com/pycaret/pycaret/master/datasets/spam.csv"
print("Downloading dataset...")
df = pd.read_csv(url, encoding='latin-1')

df.columns = ['label', 'message']
df['label_num'] = df['label'].map({'ham': 0, 'spam': 1})

X = df['message']
y = df['label_num']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('model', LogisticRegression())
])

print("Training model...")
pipeline.fit(X_train, y_train)

score = pipeline.score(X_test, y_test)
print(f"Accuracy: {score*100:.2f}%")

filename = 'spam_mail_model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(pipeline, file)

    print(f"Model saved as '{filename}'")

    print("Downloading file...")
    files.download(filename)


Downloading dataset...
Training model...
Accuracy: 95.25%
Model saved as 'spam_mail_model.pkl'
Downloading file...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>