In [5]:
import json
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report


with open('data.json', 'r') as f:
    data = json.load(f)

intents = data['intents']

X = []
y = []

for intent in intents:
    for pattern in intent['patterns']:
        X.append(pattern)
        y.append(intent['tag'])


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# tfidf
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

labels = list(set(y_train))
label_to_idx = {label: idx for idx, label in enumerate(labels)}
y_train_numeric = np.array([label_to_idx[label] for label in y_train])
y_test_numeric = np.array([label_to_idx[label] for label in y_test])

# Naive Bayes model
naive_bayes_model = MultinomialNB()
naive_bayes_model.fit(X_train_tfidf, y_train_numeric)

y_pred_numeric = naive_bayes_model.predict(X_test_tfidf)
pred_labels = [labels[idx] for idx in y_pred_numeric]

print(classification_report(y_test, pred_labels))


                                                       precision    recall  f1-score   support

                        About_Radiographic_Guide_CBCT       0.00      0.00      0.00         2
                About_Radiographic_Guide_CBCT_Patient       0.00      0.00      0.00         2
           About_Radiographic_Guide_CBCT_Review_Scans       0.00      0.00      0.00         1
        About_Radiographic_Guide_CBC_Faculty_Approval       0.00      0.00      0.00         3
                          About_STI_Crown_Appointment       0.00      0.00      0.00         1
               About_STI_Extraction_Site_Preservation       0.00      0.00      0.00         1
                          About_STI_Protocol_Approval       0.00      0.00      0.00         1
                   About_STI_Protocol_Custom_Abutment       0.00      0.00      0.00         2
                          About_STI_Protocol_Discount       0.00      0.00      0.00         2
                   About_STI_Protocol_Extraction_

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
import random
import json
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import classification_report

# Load data from JSON
with open('data.json', 'r') as json_data:
    intents = json.load(json_data)

# Extract data from JSON
X = []
y = []
for intent in intents['intents']:
    for pattern in intent['patterns']:
        X.append(pattern)
        y.append(intent['tag'])

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the vectorizer with preprocessing and hyperparameters
vectorizer = TfidfVectorizer(
    lowercase=True,
    stop_words='english',  # You can experiment with removing stopwords
    ngram_range=(1, 2),    # Try using bigrams in addition to unigrams
    max_df=0.8             # Experiment with removing high-frequency terms
)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Labels to numerical values
labels = list(set(y_train))
label_to_idx = {label: idx for idx, label in enumerate(labels)}
y_train_numeric = np.array([label_to_idx[label] for label in y_train])
y_test_numeric = np.array([label_to_idx[label] for label in y_test])

# Hyperparameter grid for GridSearchCV
param_grid = {
    'alpha': [0.1, 0.5, 1.0],  # Experiment with different values of alpha
}

# Naive Bayes Model with GridSearchCV
naive_bayes_model = MultinomialNB()
grid_search = GridSearchCV(naive_bayes_model, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train_tfidf, y_train_numeric)

# Get the best model from GridSearchCV
best_naive_bayes_model = grid_search.best_estimator_

# Predict labels for the test set
y_pred_numeric = best_naive_bayes_model.predict(X_test_tfidf)
pred_labels = [labels[idx] for idx in y_pred_numeric]

# Print classification report
print(classification_report(y_test, pred_labels))




                                                       precision    recall  f1-score   support

                        About_Radiographic_Guide_CBCT       0.00      0.00      0.00         2
                About_Radiographic_Guide_CBCT_Patient       0.00      0.00      0.00         2
           About_Radiographic_Guide_CBCT_Review_Scans       0.50      1.00      0.67         1
        About_Radiographic_Guide_CBC_Faculty_Approval       1.00      0.67      0.80         3
                          About_STI_Crown_Appointment       0.00      0.00      0.00         1
               About_STI_Extraction_Site_Preservation       1.00      1.00      1.00         1
                          About_STI_Protocol_Approval       1.00      1.00      1.00         1
                   About_STI_Protocol_Custom_Abutment       1.00      1.00      1.00         2
                          About_STI_Protocol_Discount       1.00      1.00      1.00         2
                   About_STI_Protocol_Extraction_

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
from sklearn.metrics import classification_report

# ... (previous code)

# get best model from GridSearchCV
best_naive_bayes_model = grid_search.best_estimator_

# ... (previous code)

# Predict labels for the test set
y_pred_numeric = best_naive_bayes_model.predict(X_tfidf)
pred_labels = [labels[idx] for idx in y_pred_numeric]

# Print classification report
print(classification_report(y, pred_labels))


                                                       precision    recall  f1-score   support

                          About_IOD_Protocol_Approval       0.00      0.00      0.00         4
                        About_Radiographic_Guide_CBCT       0.00      0.00      0.00         5
                 About_Radiographic_Guide_CBCT_Barium       0.00      0.00      0.00         3
                About_Radiographic_Guide_CBCT_Patient       0.00      0.00      0.00         4
           About_Radiographic_Guide_CBCT_Review_Scans       0.00      0.00      0.00         6
        About_Radiographic_Guide_CBC_Faculty_Approval       0.00      0.00      0.00         5
                          About_STI_Crown_Appointment       1.00      0.17      0.29         6
               About_STI_Extraction_Site_Preservation       1.00      0.17      0.29         6
                          About_STI_Protocol_Approval       1.00      0.50      0.67         6
                   About_STI_Protocol_Custom_Abut

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [8]:

def chatbot_loop():
    bot_name = "Chatbot"
    print("Let's chat! (type 'Quit' to exit)")
    while True:
        user_input = input("You: ")
        if user_input == "Quit":
            break
        
        user_input_tfidf = vectorizer.transform([user_input]) 
        intent = classify_intent(user_input)
        
        similarity_scores = user_input_tfidf.dot(X_tfidf.T).toarray()[0]
        max_similarity = np.max(similarity_scores)

        print(f"You: {user_input}")
        if max_similarity < 0.5:
            print(f"{bot_name}: I do not understand...")
        else:
            for intent_data in intents['intents']:
                if intent_data['tag'] == intent:
                    responses = intent_data['responses']
                    response = f"{bot_name}: " + random.choice(responses)
                    break
            else:
                response = f"{bot_name}: I do not understand..."

            print(response)

chatbot_loop()


Let's chat! (type 'Quit' to exit)
You: hello
Chatbot: You need to get a fixed cassette from the window, x-ray sensor and implant cassette from implant clinic manager office. If the implant is already placed, you need to know which implant system (Brand and size) that your patient has. Please look the patient   s information from the Axium in clinical notes or components used section in forms. If you are taking an impression, you will need to get the implant impression coping and implant analog for conventional PVS impression or Scan body for digital impression.
You: hello
Chatbot: You need to get a fixed cassette from the window, x-ray sensor and implant cassette from implant clinic manager office. If the implant is already placed, you need to know which implant system (Brand and size) that your patient has. Please look the patient   s information from the Axium in clinical notes or components used section in forms. If you are taking an impression, you will need to get the implant impr