In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('mail_data.csv')

In [3]:
data = df.where(pd.notnull(df), '')

In [4]:
data.loc[data['Category'] == 'spam', 'Category',] = 0
data.loc[data['Category'] == 'ham', 'Category',] = 1

In [5]:
print(data)

     Category                                            Message
0           1  Go until jurong point, crazy.. Available only ...
1           1                      Ok lar... Joking wif u oni...
2           0  Free entry in 2 a wkly comp to win FA Cup fina...
3           1  U dun say so early hor... U c already then say...
4           1  Nah I don't think he goes to usf, he lives aro...
...       ...                                                ...
5567        0  This is the 2nd time we have tried 2 contact u...
5568        1               Will ü b going to esplanade fr home?
5569        1  Pity, * was in mood for that. So...any other s...
5570        1  The guy did some bitching but I acted like i'd...
5571        1                         Rofl. Its true to its name

[5572 rows x 2 columns]


In [6]:
X = data['Message']
Y = data['Category']

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=3)


In [12]:
feature_extraction = TfidfVectorizer(min_df=1, stop_words='english', lowercase=True)
X_train_feature = feature_extraction.fit_transform(X_train)
X_test_feature = feature_extraction.transform(X_test)
Y_train = Y_train.astype('int')
Y_test = Y_test.astype('int')

In [13]:
model = LogisticRegression()

In [14]:
model.fit(X_train_feature, Y_train)

In [15]:
Y_predict_train = model.predict(X_train_feature)
accuracy_train = accuracy_score(Y_train, Y_predict_train)
print(f"Training Accuracy: {accuracy_train}")

Training Accuracy: 0.9670181736594121


In [16]:
Y_predict_test = model.predict(X_test_feature)
accuracy_test = accuracy_score(Y_test, Y_predict_test)
print(f"test Accuracy: {accuracy_test}")

test Accuracy: 0.9659192825112107


In [17]:
inputt = ["""Dear Valued Customer,We are thrilled to inform you that you have been selected as the lucky winner of a $1,000 gift card! This is a limited-time offer, and you must claim your prize within the next 24 hours.

To claim your gift card, simply click on the link below and follow the instructions:

Claim Your $1,000 Gift Card Now

Don’t miss out on this incredible opportunity! Act fast and enjoy your shopping spree.

Best regards,

The Rewards Team"""]

inputt_feature = feature_extraction.transform(inputt)
inputt_predict = model.predict(inputt_feature)
print(inputt_predict)

[0]


In [18]:
inputt2 =["""Hi Team,

I hope this email finds you well.

This is a friendly reminder about our upcoming project update meeting scheduled for Monday, September 18th at 10:00 AM. We will be discussing the progress of our current tasks, addressing any challenges, and planning the next steps.

Please make sure to review the project documents and come prepared with any questions or updates you might have.

Looking forward to seeing you all there!

Best regards"""]

inputt_feature2 = feature_extraction.transform(inputt2)
inputt_predict2 = model.predict(inputt_feature2)
print(inputt_predict2)

[1]
