In [None]:
import numpy as np
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
import pickle

In [None]:
# Load the dataset
df = pd.read_csv("spam_ham_dataset.csv")

In [None]:
# Drop the column 'Unnamed: 0'
df = df.drop("Unnamed: 0", axis=1)

In [None]:
# Preprocess the text data (this part should ideally be the same as what was done during training)
nltk.download('stopwords')
corpus = []
for i in range(0, len(df)):
    text = re.sub("[^a-zA-Z0-9]", " ", df["text"][i])
    text = text.lower()
    text = text.split()
    ps = PorterStemmer()
    text = [ps.stem(word) for word in text if not word in set(stopwords.words("english"))]
    text = " ".join(text)
    corpus.append(text)

In [None]:
# Load the trained model and vectorizer
model = pickle.load(open("spam-sms-mnb-model.pkl", "rb"))
cv = pickle.load(open('cv-transform.pkl', 'rb'))

In [None]:
# Transform the test data using the vectorizer
X_test = cv.transform(corpus).toarray()
y_test = pd.get_dummies(df['label'])['spam'].values

In [None]:
# Predict the values
y_pred = model.predict(X_test)

In [None]:
# Convert predictions from boolean to integer (if needed)
y_pred = y_pred.astype(int)
# Print the predictions
print(y_pred)

In [None]:
# Calculate the confusion matrix and accuracy score
cm = confusion_matrix(y_test, y_pred)
score = accuracy_score(y_test, y_pred)

In [None]:
# Print the confusion matrix and accuracy score
print(cm)
print('Accuracy Score Is:', score * 100)