In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
file_path = ('/content/spam.csv')
data = pd.read_csv(file_path, encoding='latin-1')

# Drop unnecessary columns and rename columns for convenience
data = data.drop(columns=['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'])
data.columns = ['label', 'message']

# Encode the labels (spam: 1, ham: 0)
data['label'] = data['label'].map({'spam': 1, 'ham': 0})

# Split the data into features and target variable
X = data['message']
y = data['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Vectorize the text data using TF-IDF
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Initialize the models
nb_clf = MultinomialNB()
log_reg = LogisticRegression(max_iter=1000, random_state=42)
svm_clf = SVC(kernel='linear', random_state=42)

# Train the Naive Bayes model
nb_clf.fit(X_train_tfidf, y_train)
y_pred_nb = nb_clf.predict(X_test_tfidf)

# Train the Logistic Regression model
log_reg.fit(X_train_tfidf, y_train)
y_pred_log_reg = log_reg.predict(X_test_tfidf)

# Train the SVM model
svm_clf.fit(X_train_tfidf, y_train)
y_pred_svm = svm_clf.predict(X_test_tfidf)

# Evaluate the models
models = {
    "Naive Bayes": y_pred_nb,
    "Logistic Regression": y_pred_log_reg,
    "Support Vector Machine": y_pred_svm
}

metrics = {}
for model_name, y_pred in models.items():
    metrics[model_name] = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred)
    }

metrics


{'Naive Bayes': {'Accuracy': 0.9668161434977578,
  'Precision': 1.0,
  'Recall': 0.7533333333333333,
  'F1 Score': 0.8593155893536121},
 'Logistic Regression': {'Accuracy': 0.9524663677130045,
  'Precision': 0.970873786407767,
  'Recall': 0.6666666666666666,
  'F1 Score': 0.7905138339920948},
 'Support Vector Machine': {'Accuracy': 0.979372197309417,
  'Precision': 0.9703703703703703,
  'Recall': 0.8733333333333333,
  'F1 Score': 0.9192982456140351}}