# Setting Up Libraries

In [2]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import pandas as pd

## Setting Up Training Data

In [6]:
train_data = [
    ('What course offer?','course_title'),
    ('What is the program overview?', 'program_overview'),
    ('How much is the course fee?', 'course_fee'),
    ('Where is the campus located?', 'campus_location'),
    ('When does the course intake start?', 'course_intake'),
    ('How long is the course duration?', 'course_duration'),
    ('Which campus offer this course?','campus_location','course_title'),
    ('Where is the Computer Science course located?', 'course_location'),
    ('What is the title of the Business Administration course?', 'course_title'),
    ('Can you provide an overview of the Nursing program?', 'program_overview'),
    ('What is the intake period for the Marketing course?', 'course_intake'),
    ('How long is the Engineering course duration?', 'course_duration'),
    ('What is the location of the Law School campus?', 'campus_location'),
    ('What is the fee for the Psychology course?', 'course_fee'),
    ('What is the duration of the Graphic Design course?', 'course_duration'),
    ('What is the Computer Engineering program about?', 'program_overview'),
    ('What is the fee for the Accounting course?', 'course_fee'),
    ('Which campus offers the Civil Engineering program?', 'campus_location'),
    ('What is the intake period for the Marketing course?', 'course_intake'),
    ('Where is the Fine Arts course located?', 'course_location'),
    ('What is the course title of the Mathematics program?', 'course_title'),   
    ('What is the duration of the Architecture course?', 'course_duration'),
    ('What is the fee for the Information Technology course?', 'course_fee'),
    ('Can you provide an overview of the Chemistry program?', 'program_overview'),
    ('What is the location of the Biology campus?', 'campus_location'),
    ('What is the course title of the Journalism program?', 'course_title'),
    ('What is the intake period for the Public Relations course?', 'course_intake'),
    ('Where is the Physics course located?', 'course_location')
]

x = [i[0] for i in train_data]
y = [i[1] for i in train_data]

## Split Data into Training and Testing Sets

In [7]:
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2,random_state=42)

## Vectorize input data

In [9]:
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

## Train Naive Bayes Model

In [10]:
clf = MultinomialNB()
clf.fit(X_train_vec, y_train)

MultinomialNB()

## Evaluate Naive Bayes Model 

In [12]:
y_pred = clf.predict(X_test_vec)
acc = accuracy_score(y_test,y_pred)
print("Accuracy: ", acc)

Accuracy:  0.5


## Use Naive Bayes Model for prediction

In [13]:
user_input = "How much is the fee for software engineering?"
user_input_vec = vectorizer.transform([user_input])
intent = clf.predict(user_input_vec)
print("User intent: ", intent[0])

User intent:  course_fee
