In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
import joblib
import os

# Load data
df = pd.read_csv('../data/sample_issues.csv')

# Drop missing values
df.dropna(inplace=True)

# Create pipeline: TF-IDF + Naive Bayes
model = make_pipeline(
    TfidfVectorizer(stop_words='english'),
    MultinomialNB()
)

# Train on all data (since we have few samples)
model.fit(df['description'], df['category'])

# Check a few predictions manually
sample_texts = [
    "There's a pothole near my house",
    "Garbage not cleaned in days",
    "No water in the kitchen tap"
]

for text in sample_texts:
    print(f"{text} ➝ {model.predict([text])[0]}")
# Save model
os.makedirs('../ml', exist_ok=True)
joblib.dump(model, '../ml/issue_classifier.pkl')


There's a pothole near my house ➝ water
Garbage not cleaned in days ➝ sanitation
No water in the kitchen tap ➝ water


['../ml/issue_classifier.pkl']