In [1]:
import json
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report


with open('data.json', 'r') as f:
    data = json.load(f)

intents = data['intents']

X = []
y = []

for intent in intents:
    for pattern in intent['patterns']:
        X.append(pattern)
        y.append(intent['tag'])


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#tfidf 
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


labels = list(set(y_train))
label_to_idx = {label: idx for idx, label in enumerate(labels)}
y_train_numeric = np.array([label_to_idx[label] for label in y_train])
y_test_numeric = np.array([label_to_idx[label] for label in y_test])

#hyperparameters for GridSearchCV
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'sigmoid'],
    'gamma': ['scale', 'auto'] + [0.001, 0.01, 0.1, 1, 10]
}

#model
svm_model = SVC()
grid_search = GridSearchCV(svm_model, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train_tfidf, y_train_numeric)

#best model from GridSearchCV
best_svm_model = grid_search.best_estimator_
y_pred_numeric = best_svm_model.predict(X_test_tfidf)
pred_labels = [labels[idx] for idx in y_pred_numeric]

print(classification_report(y_test, pred_labels))





                                                       precision    recall  f1-score   support

                          About_IOD_Protocol_Approval       0.00      0.00      0.00         0
                        About_Radiographic_Guide_CBCT       1.00      1.00      1.00         2
                About_Radiographic_Guide_CBCT_Patient       1.00      0.50      0.67         2
           About_Radiographic_Guide_CBCT_Review_Scans       0.50      1.00      0.67         1
        About_Radiographic_Guide_CBC_Faculty_Approval       1.00      0.67      0.80         3
                          About_STI_Crown_Appointment       0.50      1.00      0.67         1
               About_STI_Extraction_Site_Preservation       0.00      0.00      0.00         1
                          About_STI_Protocol_Approval       1.00      1.00      1.00         1
                      About_STI_Protocol_Consultation       0.67      1.00      0.80         2
                   About_STI_Protocol_Custom_Abut

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [3]:
import random
import json
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV


with open('data.json', 'r') as json_data:
    intents = json.load(json_data)

#extract data from json
X = []
y = []

for intent in intents['intents']:
    for pattern in intent['patterns']:
        X.append(pattern)
        y.append(intent['tag'])

# vectorize data
vectorizer = TfidfVectorizer()
X_tfidf = vectorizer.fit_transform(X)

# labels to numerical values
labels = list(set(y))
label_to_idx = {label: idx for idx, label in enumerate(labels)}
y_numeric = np.array([label_to_idx[label] for label in y])

# hyperparameter grid for GridSearchCV
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'sigmoid'],
    'gamma': ['scale', 'auto'] + [0.001, 0.01, 0.1, 1, 10]
}

#SVM Model 
svm_model = SVC()
grid_search = GridSearchCV(svm_model, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_tfidf, y_numeric)

# get best model from GridSearchCV
best_svm_model = grid_search.best_estimator_

#classify intent the user input 
def classify_intent(user_input):
    user_input_tfidf = vectorizer.transform([user_input])
    predicted_label_numeric = best_svm_model.predict(user_input_tfidf)
    predicted_label = labels[predicted_label_numeric[0]]
    return predicted_label





In [4]:

def chatbot_loop():
    bot_name = "Faculty" 
    print("Let's chat! (type 'Quit' to exit)")
    while True:
        user_input = input("You: ")
        if user_input == "Quit":
            break
        
        user_input_tfidf = vectorizer.transform([user_input]) 
        intent = classify_intent(user_input)
        
        similarity_scores = user_input_tfidf.dot(X_tfidf.T).toarray()[0]
        max_similarity = np.max(similarity_scores)

        print(f"You: {user_input}")
        if max_similarity < 0.5:
            print(f"{bot_name}: I do not understand...")
        else:
            for intent_data in intents['intents']:
                if intent_data['tag'] == intent:
                    responses = intent_data['responses']
                    response = f"{bot_name}: " + random.choice(responses)
                    break
            else:
                response = f"{bot_name}: I do not understand..."

            print(response)

chatbot_loop()


Let's chat! (type 'Quit' to exit)
You: hi
Chatbot: Hello there! Welcome to Predoctoral Implant Program. How may I assist you today?
You: What codes do I have to enter for STI?
Chatbot:  For healed site (tooth that has been extracted): 1. D9365 UG Implant Consultation-STI 2. D0365L CBCT-One arch Mandible or D0366U CBCT-One arch Maxilla (if implants involve two arches, only put one arch code) 3. D6010U2 UG surg. Place. Endosteal implant 4. DD6190 Digital-Radiographic/surgical implant index (only charge one for multiple implants) 5. D6104UG Bone Graft at Impl place (always add no matter apply or not) 6. D7952UG Sinus lift-internal (based on CBCT review, consult with faculty) 7. DD6057 Digital-custom abutment 8. DD6058 Digital-all porcelain/ceramic crown on abutment (cement retained) 9. UG-implant recall-STI # For extracted site (tooth is present that needs to be extracted first): 1. D9365 UG Implant Consultation-STI 2. D7140 Extraction 3. D7953UG Extraction site preservation/graft 4. D036