# 📝 Simple NLP Text Classification (Spam / Not Spam)
This notebook demonstrates a **basic NLP pipeline** using **Naive Bayes**.

In [None]:

# Install dependencies (if needed)
# !pip install pandas scikit-learn


In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report


### 1. Create Sample Dataset

In [None]:

data = {
    "text": [
        "Win a free iPhone now",
        "You have been selected for a lottery prize",
        "Let's meet tomorrow for lunch",
        "This is your invoice for last month",
        "Congratulations! You won a gift card",
        "Please review the attached project document",
        "Earn money quickly from home"
    ],
    "label": [
        "spam", "spam", "ham", "ham", "spam", "ham", "spam"
    ]
}

df = pd.DataFrame(data)
df


### 2. Split Data Into Train/Test

In [None]:

X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.3, random_state=42)


### 3. Convert Text to Features Using TF-IDF

In [None]:

vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


### 4. Train Naive Bayes Model

In [None]:

model = MultinomialNB()
model.fit(X_train_vec, y_train)


### 5. Evaluate Model

In [None]:

y_pred = model.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


### 6. Test with New Examples

In [None]:

samples = [
    "Claim your free reward now",
    "Can we schedule a meeting tomorrow?"
]

sample_vec = vectorizer.transform(samples)
predictions = model.predict(sample_vec)

for text, label in zip(samples, predictions):
    print(f"Text: {text} --> Prediction: {label}")
