In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [3]:
# Load the dataset
data = pd.read_csv('/content/spam.csv', encoding='latin-1')

In [4]:
# Drop unused columns to clean up the dataset
data = data[['Category', 'Message']]

In [5]:
# Convert labels to a numerical variable
data['Category'] = data['Category'].map({'ham': 0, 'spam': 1})

In [6]:
# Splitting the dataset into training and testing set
X_train, X_test, y_train, y_test = train_test_split(data['Message'], data['Category'], test_size=0.25, random_state=42)

In [7]:
# Text preprocessing and converting text into vectors
vectorizer = CountVectorizer()
X_train_vectors = vectorizer.fit_transform(X_train)
X_test_vectors = vectorizer.transform(X_test)

In [8]:
# Training the Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train_vectors, y_train)

In [9]:
# Making predictions and evaluating the model
predictions = model.predict(X_test_vectors)
print("Accuracy:", accuracy_score(y_test, predictions))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, predictions))
print("\nClassification Report:\n", classification_report(y_test, predictions))

Accuracy: 0.990667623833453

Confusion Matrix:
 [[1205    2]
 [  11  175]]

Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      0.99      1207
           1       0.99      0.94      0.96       186

    accuracy                           0.99      1393
   macro avg       0.99      0.97      0.98      1393
weighted avg       0.99      0.99      0.99      1393



In [10]:
from joblib import dump

# Generate the model
dump(model, 'spam_detector_model.joblib')

# Save the model to local
from google.colab import files
files.download('spam_detector_model.joblib')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>