<a href="https://colab.research.google.com/github/burakbeyazit/chatbot-project/blob/main/Chatbot_Projesi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -r /content/drive/MyDrive/OtherDS/requirements.txt



Collecting streamlit (from -r /content/drive/MyDrive/OtherDS/requirements.txt (line 13))
  Downloading streamlit-1.45.1-py3-none-any.whl.metadata (8.9 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->-r /content/drive/MyDrive/OtherDS/requirements.txt (line 1))
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->-r /content/drive/MyDrive/OtherDS/requirements.txt (line 1))
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->-r /content/drive/MyDrive/OtherDS/requirements.txt (line 1))
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->-r /content/drive/MyDrive/OtherDS/requirements.txt (line 1))
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metad

In [3]:
import os
import json
import gradio as gr
import pandas as pd
import difflib
import openai
from huggingface_hub import InferenceClient
from sklearn.metrics import precision_score, recall_score, f1_score

# --- CONFIGURATION ---

def load_keys(config_path="/content/drive/MyDrive/OtherDS/keys.txt"):
    keys = {}
    with open(config_path, "r") as f:
        for line in f:
            if "=" in line:
                key, value = line.strip().split("=", 1)
                keys[key] = value
    return keys

# Kullanım
keys = load_keys("/content/drive/MyDrive/OtherDS/keys.txt")

OPENAI_API_KEY = keys["OPENAI_API_KEY"]
HF_TOKEN = keys["HF_TOKEN"]

client = openai.OpenAI(api_key=OPENAI_API_KEY)
hf_model_id = "microsoft/Phi-3-mini-4k-instruct"
hf_client = InferenceClient(hf_model_id, token=HF_TOKEN)


# --- DATA LOADING ---
df_books = pd.read_csv('/content/drive/MyDrive/OtherDS/archive/data.csv')
with open('/content/drive/MyDrive/OtherDS/archive/intents.json', 'r', encoding='utf-8') as f:
    intents = json.load(f)['intents']

# --- PROMPT BUILDER ---
def build_intent_list_prompt(intents):
    tag_patterns = "\n".join(
        [f"{i['tag']}: {', '.join(i['patterns'])}" for i in intents]
    )
    tag_list = ", ".join([i['tag'] for i in intents])
    return f"""You are an intent classifier bot. Possible intents and their example user inputs:

{tag_patterns}

Your task: For any user input, return ONLY one of the following intent tags: {tag_list}.
Do NOT create new tags. Just return the tag (e.g., greeting, goodbye, book_search, etc).
User input:
"""

# --- INTENT CLASSIFIERS ---
def classify_with_gpt(text):
    prompt = build_intent_list_prompt(intents) + f"'{text}'\nIntent tag:"
    try:
        response = client.chat.completions.create(
            model='gpt-3.5-turbo',
            messages=[
                {"role": "system", "content": "You are an intent classification bot."},
                {"role": "user", "content": prompt}
            ]
        )
        output = response.choices[0].message.content.strip().split('\n')[0]
        print("GPT OUTPUT:", output)
        return output
    except Exception as e:
        print("GPT ERROR:", e)
        return f"error: {str(e)}"

def classify_with_hf(text):
    prompt = build_intent_list_prompt(intents) + f"'{text}'\nIntent tag:"
    try:
        response = hf_client.text_generation(prompt, max_new_tokens=8, temperature=0.1)
        output = response.strip().split('\n')[0]
        print("HF OUTPUT:", output)
        return output
    except Exception as e:
        import traceback
        tb = traceback.format_exc()
        print("HF ERROR:", e)
        print(tb)
        return f"error: {str(e)}\n\n{tb}"



# --- FUZZY BOOK SEARCH ---
def find_book_info(user_input, df_books):
    book_titles = df_books['title'].astype(str).tolist()
    matches = difflib.get_close_matches(user_input, book_titles, n=3, cutoff=0.3)
    results = df_books[df_books['title'].isin(matches)]
    if not results.empty:
        out_text = ""
        for idx, row in results.iterrows():
            out_text += f"Title: {row.get('title', '-')}\nAuthor: {row.get('author', '-')}\nDescription: {row.get('description', '-')}\n\n"
        return out_text
    keywords = [w.lower() for w in user_input.split() if len(w) > 3]
    filter_fn = lambda x: any(kw in str(x).lower() for kw in keywords)
    filtered = df_books[df_books['title'].apply(filter_fn)]
    if not filtered.empty:
        out_text = ""
        for idx, row in filtered.iterrows():
            out_text += f"Title: {row.get('title', '-')}\nAuthor: {row.get('author', '-')}\nDescription: {row.get('description', '-')}\n\n"
        return out_text
    return "No matching book found in data.csv."

def format_book_responses(responses):
    text = ""
    for i, r in enumerate(responses, 1):
        text += (
            f"#{i} - Title: {r.get('Book', '-')}\n"
            f"    Feedback: {r.get('Feedback', '-')}\n"
            f"    Goodreads Rate: {r.get('Rate', '-')}\n\n"
        )
    return text

# --- MAIN CHATBOT FUNCTION ---
def chatbot_fn(user_input, model_choice):
    if model_choice == "GPT-3.5-Turbo":
        intent = classify_with_gpt(user_input)
    else:
        intent = classify_with_hf(user_input)
    if intent.startswith("error:"):
        return f"MODEL ERROR:\n{intent}"

    # Book info intentleri için
    if intent.lower() in ['book_search', 'info', 'book_info']:
        result = find_book_info(user_input, df_books)
        return f"Intent: {intent}\nResults:\n{result}"

    # direct tag match
    for i in intents:
        if i['tag'].lower().strip() == intent.lower().strip():
            if 'responses' in i and isinstance(i['responses'], list):
                responses = i['responses']
                if isinstance(responses[0], dict):
                    return f"Intent: {intent}\n\n{format_book_responses(responses)}"
                else:
                    return f"Intent: {intent}\nResponse: {responses[0]}"
            else:
                return f"Intent: {intent}\nNo response found."
    return f"Intent: {intent}\nNo matching intent in intents.json."

# --- METRIC EVALUATION ---
test_examples = [
    ("Can you recommend a book?", "book_search"),
    ("Tell me about Lord of the Rings.", "book_info"),
    ("Thanks!", "thanks"),
    ("Hello", "greeting"),
    ("Goodbye!", "goodbye"),
    ("Suggest me something to read", "book_search"),
    ("I want to know about Moby Dick", "book_info"),
    ("Thank you", "thanks"),
    ("Hi there!", "greeting"),
    ("See you later", "goodbye"),
]

def evaluate_model(classifier_fn, test_examples):
    y_true = []
    y_pred = []
    for question, true_label in test_examples:
        pred = classifier_fn(question)
        pred_norm = pred.lower().strip()
        true_norm = true_label.lower().strip()
        y_true.append(true_norm)
        y_pred.append(pred_norm)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    return precision, recall, f1

def eval_interface():
    p_g, r_g, f_g = evaluate_model(classify_with_gpt, test_examples)
    p_h, r_h, f_h = evaluate_model(classify_with_hf, test_examples)
    results = (
        f"**GPT-3.5-Turbo:**\n"
        f"Precision: {p_g:.2f}\nRecall: {r_g:.2f}\nF1: {f_g:.2f}\n\n"
        f"**OpenSource LLM:**\n"
        f"Precision: {p_h:.2f}\nRecall: {r_h:.2f}\nF1: {f_h:.2f}"
    )
    return results

# --- GRADIO INTERFACE ---
with gr.Blocks() as demo:
    gr.Markdown("## Book Recommendation Chatbot (GPT & OpenSource LLM)")
    gr.Markdown(
        "Intent classification and book info lookup with your own dataset!<br>"
        "Select GPT-3.5-Turbo (OpenAI) or OpenSource LLM (HuggingFace, e.g. Mistral, Llama-3, etc)."
    )

    with gr.Tab("Chatbot Demo"):
        input_box = gr.Textbox(label="Ask your question or request a book:")
        model_radio = gr.Radio(choices=["GPT-3.5-Turbo", "OpenSource LLM (HuggingFace)"], value="GPT-3.5-Turbo", label="Model")
        output_box = gr.Textbox(label="Output", lines=10)
        send_btn = gr.Button("Get Response")
        send_btn.click(fn=chatbot_fn, inputs=[input_box, model_radio], outputs=output_box)

    with gr.Tab("Model Evaluation"):
        eval_output = gr.Textbox(label="Model Comparison", lines=8)
        eval_btn = gr.Button("Run Evaluation")
        eval_btn.click(fn=eval_interface, inputs=[], outputs=eval_output)

demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://fec651d70de13e4192.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


