In [None]:
import streamlit as st
import nltk
import pandas as pd
from nltk import word_tokenize, pos_tag, ne_chunk
import nltk
nltk.download('averaged_perceptron_tagger')

nltk.download('punkt')
nltk.download('maxent_ne_chunker')
nltk.download('words')

def load_medquad_dataset(csv_path="/Users/cansarma/Desktop/Ollama/Task 3/MedQuAD-master/QA-TestSet-LiveQA-Med-Qrels-2479-Answers/All-2479-Answers-retrieved-from-MedQuAD.csv",
                          txt_path="/Users/cansarma/Desktop/Ollama/Task 3/MedQuAD-master/QA-TestSet-LiveQA-Med-Qrels-2479-Answers/All-qrels_LiveQAMed2017-TestQuestions_2479_Judged-Answers.txt"):

    data = pd.read_csv(csv_path)
    data.columns = data.columns.str.strip()

    print("Columns in CSV:", data.columns.tolist())

    with open(txt_path, 'r') as f:
        answers = f.readlines()
    records = []
    for index, answer in enumerate(data['Answer']):  
        if index < len(answers):
            question = answers[index].strip()
            records.append({
                'question': question,
                'answer': answer.strip()
            })

    return records

def search_medquad_data(query, data, n=5):
    results = []
    for entry in data:
        if query.lower() in entry['question'].lower() or query.lower() in entry['answer'].lower():
            results.append(entry)
            if len(results) >= n:
                break
    return results

def display_answers(results):
    if results:
        for idx, result in enumerate(results):
            st.write(f"### Q{idx + 1}: {result['question']}")
            st.write(f"**Answer**: {result['answer']}")
            st.write("---")
    else:
        st.write("No relevant answers found.")

def recognize_medical_entities(text):
    tokens = word_tokenize(text)
    pos_tags = pos_tag(tokens)
    entities_tree = ne_chunk(pos_tags)
    entities = []
    for subtree in entities_tree:
        if isinstance(subtree, nltk.Tree):
            entity_name = " ".join([word for word, pos in subtree.leaves()])
            entity_type = subtree.label()
            entities.append((entity_name, entity_type))
    return entities

# Streamlit 
def main():
    st.title("Medical Q&A Chatbot")

    st.write("Loading MedQuAD dataset...")
    data = load_medquad_dataset()
    st.write("Dataset loaded")

    user_input = st.text_input("Ask a medical question")

    if user_input:
        entities = recognize_medical_entities(user_input)
        if entities:
            st.write("### Recognized Entities:")
            for entity, label in entities:
                st.write(f"- {entity}: {label}")
        else:
            st.write("No entities recognized.")

        st.write("### Searching for relevant answers...")
        results = search_medquad_data(user_input, data)
        display_answers(results)

if __name__ == "__main__":
    main()
