In [99]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.pipeline import Pipeline

# Function to load CSV data using input
def load_csv_data():
    file_path = input("Enter the path to the CSV file: ")
    return pd.read_csv(file_path)

# Load data
data = load_csv_data()
print("Data loaded successfully!")
print(data.head())

# Ensure data has the correct columns (usually 'Category' and 'Message')
if 'Category' not in data.columns or 'Message' not in data.columns:
    raise ValueError("CSV must contain 'Category' and 'Message' columns.")

# Label encoding: ham -> 0, spam -> 1
data['Category'] = data['Category'].map({'ham': 0, 'spam': 1})

# Split data into train and test sets
X = data['Message']
y = data['Category']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define the model pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(min_df=1, stop_words='english', lowercase=True)),
    ('model', LogisticRegression(max_iter=200))
])

# Train the model
pipeline.fit(X_train, y_train)

# Predictions
y_pred = pipeline.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Test on new data
def test_new_email():
    print("\nEnter a message to classify as spam or ham (type 'exit' to quit):")
    while True:
        user_input = input("Message: ")
        if user_input.lower() == 'exit':
            break
        prediction = pipeline.predict([user_input])
        print("Spam" if prediction[0] == 1 else "Ham")

# Run test cases
test_new_email()


Enter the path to the CSV file:  mail_data.csv


Data loaded successfully!
  Category                                            Message
0      ham  Go until jurong point, crazy.. Available only ...
1      ham                      Ok lar... Joking wif u oni...
2     spam  Free entry in 2 a wkly comp to win FA Cup fina...
3      ham  U dun say so early hor... U c already then say...
4      ham  Nah I don't think he goes to usf, he lives aro...
Accuracy: 0.9659090909090909
Confusion Matrix:
 [[1446    2]
 [  55  169]]
Classification Report:
               precision    recall  f1-score   support

           0       0.96      1.00      0.98      1448
           1       0.99      0.75      0.86       224

    accuracy                           0.97      1672
   macro avg       0.98      0.88      0.92      1672
weighted avg       0.97      0.97      0.96      1672


Enter a message to classify as spam or ham (type 'exit' to quit):


Message:  I would like to congratulate you on winning the lucky draw


Ham


Message:  exit
