# Logistic regression based intent classifier

## Import libraries

In [10]:
import sys
sys.path.append("G:/Cdac/ML_Final_Project/Multi-Modal-multi-Purpose-AI-agent/intent_classifier")
import logging
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from myutils.data_preprocessor import load_and_prepare_data
from sklearn.feature_extraction.text import TfidfVectorizer
import os
import pickle

In [11]:
data_path = "../../data/raw"
df = load_and_prepare_data(data_path)
X = df['text']
y = df['intent']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', LogisticRegression(solver='liblinear', max_iter=1000))
])

pipeline.fit(X_train, y_train)


In [13]:
y_pred = pipeline.predict(X_test)
print("Classification Report:\n")
print(classification_report(y_test, y_pred))


Classification Report:

                  precision    recall  f1-score   support

convert_to_audio       0.96      0.99      0.97       273
gmail_operations       0.99      0.99      0.99       195
      make_notes       1.00      0.99      0.99       439
 stock_sentiment       1.00      1.00      1.00       228
   voice_summary       0.98      0.94      0.96       163

        accuracy                           0.99      1298
       macro avg       0.99      0.98      0.99      1298
    weighted avg       0.99      0.99      0.99      1298



In [14]:
os.makedirs("./model", exist_ok=True)
with open("./model/intent_classifier_pipeline.pkl", "wb") as f:
    pickle.dump(pipeline, f)