In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score

# Load dataset
df = pd.read_csv('/content/drive/MyDrive/spam.csv', encoding='latin-1')[['v1', 'v2']]
df.columns = ['label', 'message']
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    df['message'], df['label'], test_size=0.2, random_state=42
)

# Convert text to TF-IDF features
vectorizer = TfidfVectorizer(stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train Support Vector Machine model
model = LinearSVC()
model.fit(X_train_tfidf, y_train)

# Make predictions
y_pred = model.predict(X_test_tfidf)

# Evaluate model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.97847533632287

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.99      0.99       965
           1       0.96      0.87      0.92       150

    accuracy                           0.98      1115
   macro avg       0.97      0.93      0.95      1115
weighted avg       0.98      0.98      0.98      1115



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score

# Load dataset
# Corrected file path to the location on Google Drive
df = pd.read_csv('/content/drive/MyDrive/spam.csv', encoding='latin-1')[['v1', 'v2']]
df.columns = ['label', 'message']
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    df['message'], df['label'], test_size=0.2, random_state=42
)

# Convert text to TF-IDF features
vectorizer = TfidfVectorizer(stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train Support Vector Machine model
model = LinearSVC()
model.fit(X_train_tfidf, y_train)

# Make predictions
y_pred = model.predict(X_test_tfidf)

# Evaluate model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# ---- 🔮 Prediction for new messages ----
def predict_message(text):
    # Transform the new message using the same vectorizer
    text_tfidf = vectorizer.transform([text])
    # Get the prediction from the model
    prediction = model.predict(text_tfidf)[0]
    # Return the result based on the prediction
    return "SPAM" if prediction == 1 else "NOT SPAM"

# 🔧 Example usage
while True:
    user_input = input("\nEnter an SMS message (or type 'exit' to quit):\n> ")
    if user_input.lower() == 'exit':
        break
    result = predict_message(user_input)
    print(f"Prediction: {result}")

Accuracy: 0.97847533632287

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.99      0.99       965
           1       0.96      0.87      0.92       150

    accuracy                           0.98      1115
   macro avg       0.97      0.93      0.95      1115
weighted avg       0.98      0.98      0.98      1115


Enter an SMS message (or type 'exit' to quit):
> hii
Prediction: NOT SPAM

Enter an SMS message (or type 'exit' to quit):
> gfedfghjuyt
Prediction: NOT SPAM

Enter an SMS message (or type 'exit' to quit):
> exit


In [None]:
# Load dataset
df = pd.read_csv('/content/drive/MyDrive/spam.csv', encoding='latin-1')[['v1', 'v2']]