In [1]:
print("Hello World")

Hello World


In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("intent_dataset.csv")

In [4]:
df

Unnamed: 0,User Query,Intent Label
0,What is the number of bathrooms of a apartment...,database_query
1,Show me condo with security,database_query
2,What's the difference between Java and C++?,irrelevant
3,Do you have any duplex with four bedrooms?,database_query
4,Do you have any townhouse with one bedrooms?,database_query
...,...,...
495,Yo mate,general_query
496,Find me a penthouse for sale,database_query
497,Hey there,general_query
498,Please explain object-oriented programming,irrelevant


In [5]:
df["Intent Label"].value_counts()

Intent Label
irrelevant        167
general_query     167
database_query    166
Name: count, dtype: int64

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [7]:
tfidf_vectorizer = TfidfVectorizer()

In [8]:
tfidf_matrix = tfidf_vectorizer.fit_transform(df["User Query"])

In [9]:
print("TF-IDF Matrix Shape:", tfidf_matrix.shape)

TF-IDF Matrix Shape: (500, 225)


In [10]:
tfidf_vectorizer.get_feature_names_out()[:10]

array(['address', 'afternoon', 'alarm', 'am', 'amenities', 'an', 'and',
       'any', 'apartment', 'are'], dtype=object)

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
X_train, X_test, y_train, y_test = train_test_split(
    tfidf_matrix, df["Intent Label"], test_size=0.2, random_state=42
)

In [13]:
print("Training Set Shape:", X_train.shape)
print("Testing Set Shape:", X_test.shape)

Training Set Shape: (400, 225)
Testing Set Shape: (100, 225)


In [14]:
from sklearn.linear_model import LogisticRegression

In [15]:
model = LogisticRegression()

In [16]:
model.fit(X_train, y_train)

In [17]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.99
Classification Report:
                 precision    recall  f1-score   support

database_query       1.00      1.00      1.00        41
 general_query       1.00      0.96      0.98        25
    irrelevant       0.97      1.00      0.99        34

      accuracy                           0.99       100
     macro avg       0.99      0.99      0.99       100
  weighted avg       0.99      0.99      0.99       100



In [19]:
def predict_intent(new_query, tfidf_vectorizer, model):
    query_tfidf = tfidf_vectorizer.transform([new_query])
    predicted_intent = model.predict(query_tfidf)
    return predicted_intent[0]
new_query = "Find me an apartment"
result = predict_intent(new_query, tfidf_vectorizer, model)
print(f"Query: '{new_query}' -> Predicted Intent: {result}")

Query: 'Find me an apartment' -> Predicted Intent: database_query


In [20]:
queries_to_test = [
    "Hi there",
    "Show me a villa for rent",
    "What’s the time?"
]

for query in queries_to_test:
    intent = predict_intent(query, tfidf_vectorizer, model)
    print(f"Query: '{query}' -> Predicted Intent: {intent}")

Query: 'Hi there' -> Predicted Intent: general_query
Query: 'Show me a villa for rent' -> Predicted Intent: database_query
Query: 'What’s the time?' -> Predicted Intent: irrelevant


In [21]:
import joblib

joblib.dump(tfidf_vectorizer, 'tfidf_vectorizer.pkl')

joblib.dump(model, 'intent_model.pkl')

print("Model and vectorizer saved as 'tfidf_vectorizer.pkl' and 'intent_model.pkl'")

Model and vectorizer saved as 'tfidf_vectorizer.pkl' and 'intent_model.pkl'


In [22]:
import joblib
tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')
model = joblib.load('intent_model.pkl')
print("Model and vectorizer loaded successfully!")

Model and vectorizer loaded successfully!


In [29]:
tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')
model = joblib.load('intent_model.pkl')

def predict_intent(new_query, tfidf_vectorizer, model):
    query_tfidf = tfidf_vectorizer.transform([new_query])
    predicted_intent = model.predict(query_tfidf)
    return predicted_intent[0]

queries_to_test = [
    "Find me a place to live and my budget is 50000",
    "Find me a house with a pool",
    "What’s up with the weather?"
]

for query in queries_to_test:
    intent = predict_intent(query, tfidf_vectorizer, model)
    print(f"Query: '{query}' -> Predicted Intent: {intent}")

Query: 'Find me a place to live and my budget is 50000' -> Predicted Intent: database_query
Query: 'Find me a house with a pool' -> Predicted Intent: database_query
Query: 'What’s up with the weather?' -> Predicted Intent: irrelevant
