<a href="https://colab.research.google.com/github/abhijitguinkatwa/machine-learning-projects/blob/main/SMS_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Load and explore the dataset
# Assuming you have a CSV file with columns 'label' and 'text'
sms_data = pd.read_csv('sms_data.csv')

# Display the first few rows of the dataset
print(sms_data.head())

# Step 2: Data Preprocessing
# Assuming 'label' column contains 'spam' or 'ham' values
# Convert labels to binary values: 1 for spam, 0 for non-spam
sms_data['label'] = sms_data['label'].map({'spam': 1, 'ham': 0})

# Step 3: Train-Test Split
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(sms_data['text'], sms_data['label'], test_size=0.2, random_state=42)

# Step 4: Text Vectorization
# Convert text data into numerical format using CountVectorizer
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Step 5: Build and Train the Classifier
# Use a simple Naive Bayes classifier for text classification
classifier = MultinomialNB()
classifier.fit(X_train_vectorized, y_train)

# Step 6: Make Predictions
# Predict labels for the test set
y_pred = classifier.predict(X_test_vectorized)

# Step 7: Evaluate the Model
# Calculate accuracy and display classification report and confusion matrix
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
