<a href="https://colab.research.google.com/github/croco22/CapstoneProjectTDS/blob/philipp/notebooks/Dashboard_Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Extra Task 3: Live Demo

## Imports and Setup

In [1]:
%%capture
!pip install gradio
# !pip install git+https://github.com/openai/whisper.git

import time
import pandas as pd
import gradio as gr
import google.generativeai as genai
# import whisper
from google.colab import userdata
from transformers import pipeline
from datetime import datetime


# Gemini API Setup
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-2.0-flash-exp')


def generate_text(prompt):
    try:
        response = model.generate_content(
            prompt,
            generation_config = genai.GenerationConfig(
                temperature=2.0,
            )
        )
        time.sleep(5) # Avoid exceeding API limits
        return response.text.strip()
    except Exception as e:
        exit("Error during API call: ", e)

## Load data from the provided questionnaires

In [2]:
dfs = list()

for q in range(1, 6):
    url = f'https://raw.githubusercontent.com/croco22/CapstoneProjectTDS/refs/heads/main/questionnaires/questionnaire{q}.json'
    df = pd.read_json(url)

    # Unpack options into an array
    df['options'] = df['options'].apply(lambda x: [option['option'] for option in x])

    # Remove options for specific question types
    # because irrelevant or do not contribute meaningfully to the dataset
    df.loc[df['type'].isin(['TEXT', 'NUMBER', 'DATE']), 'options'] = None

    df['questionnaire'] = f"Questionnaire {q}"

    dfs.append(df)

df = pd.concat(dfs, ignore_index=True)

## Rephrase questions if necessary

In [3]:
def rephrase_question(text):
    prompt = f"""Reformulate the following statement into a clear, concise,
        and grammatically correct question that maintains its original meaning.
        If the text is already a question, preserve its intent without altering
        content or facts. The statement is: '{text}'.
        Return the generated question without additional explanations,
        comments, or text.
    """
    return generate_text(prompt)


df['rephrased_question'] = df['question'].apply(rephrase_question)

df = df[['questionnaire', 'rephrased_question', 'options']]

df.head()

Unnamed: 0,questionnaire,rephrased_question,options
0,Questionnaire 1,Do you consent to data processing?,"[Yes, No]"
1,Questionnaire 1,What is the customer group?,"[End User, Wholesaler, Distributor, Consultant..."
2,Questionnaire 1,Which products are you interested in?,"[MY-SYSTEM, Notion, JTS, JS EcoLine, AKW100, A..."
3,Questionnaire 1,What kind of follow-up is planned?,"[Email, Phone, Schedule a Visit, No action]"
4,Questionnaire 1,Who should be included in the follow-up commun...,"[Stephan Maier, Joachim Wagner, Erik Schneider..."


## Gradio application

In [4]:
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

def find_question_and_answer(sentence, dataframe):
    best_question = None
    best_answer = None
    highest_score = 0

    for _, row in dataframe.iterrows():
        question = row["rephrased_question"]
        options = row["options"]

        # Question Answering durchführen
        result = qa_pipeline(question=question, context=sentence)

        if result['score'] > highest_score:
            highest_score = result['score']
            best_question = question
            best_answer = result['answer']

    return best_question, best_answer

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/451 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [9]:
UNANSWERED_QUESTIONS = list()


def set_response():
    if len(UNANSWERED_QUESTIONS) == 0:
        return "Thank you! You have completed all the questions in this questionnaire. Please submit your responses by clicking the 'Send' button at the bottom."

    unanswered_questions_string = '\n'.join(UNANSWERED_QUESTIONS)
    response = "Please answer the following questions:\n" + unanswered_questions_string
    return response


def evaluate_message(message):
    frage, antwort = find_question_and_answer(message, df)
    df[df['rephrased_question'] == frage]["answer"] = antwort
    UNANSWERED_QUESTIONS.remove(frage)


def add_message(history, message):
    if message["text"] is not None:
        history.append({"role": "user", "content": message["text"]})
        print(UNANSWERED_QUESTIONS)
        evaluate_message(message["text"])
        if len(UNANSWERED_QUESTIONS) == 0:
            history, gr.update(visible=False)
    return history, gr.MultimodalTextbox(value=None)


def bot(history: list):
    response = set_response()
    history.append({"role": "assistant", "content": ""})
    for character in response:
        history[-1]["content"] += character
        time.sleep(0.05)
        yield history


def start_chat(dropdown, history):
    UNANSWERED_QUESTIONS = df[df['questionnaire'].isin(dropdown)]['rephrased_question'].tolist()
    history.append({"role": "assistant", "content": set_response()})
    return gr.update(visible=False), gr.update(visible=False), gr.update(value=history, visible=True), gr.update(visible=True), gr.update(visible=True)


def reset_state():
    return gr.update(visible=True), gr.update(visible=True), gr.update(value=list(), visible=False), gr.update(value=dict(), visible=False), gr.update(visible=False)


def generate_json_download():
    current_timestamp = datetime.now()
    formatted_timestamp = current_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
    filename = f"answers_{formatted_timestamp}.json"

    # MIT REIN: answers
    filtered_df = df[["rephrased_question", "options"]].rename(columns={"rephrased_question": "question"})
    filtered_df.to_json(filename, orient='records', indent=4)

    return filename


with gr.Blocks(theme='Nymbo/Nymbo_Theme') as app:
    gr.Markdown("""
        # **SurveyMaster8** - Your Ultimate Questionnaire Chatbot 🤖
        ## by Philipp Landeck<br>
    """)

    dropdown = gr.Dropdown(
        choices=list(df['questionnaire'].unique()),
        multiselect=True,
        label="Choose one or more questionnaires",
        interactive=True,
        visible=True,
        value=df['questionnaire'].iloc[0]
    )

    start_button = gr.Button("Start", visible=True)

    chatbot = gr.Chatbot(
        show_label=False,
        type="messages",
        visible=False
    )

    chat_input = gr.MultimodalTextbox(
        interactive=True,
        sources=['microphone'],
        placeholder="Enter message or record voice...",
        show_label=False,
        autofocus=True,
        visible=False
    )

    clear_and_download = gr.DownloadButton("Send", value=generate_json_download, visible=False)

    start_action = start_button.click(
        start_chat, [dropdown, chatbot], [dropdown, start_button, chatbot, chat_input, clear_and_download]
    )

    chat_msg = chat_input.submit(
        add_message, [chatbot, chat_input], [chatbot, chat_input]
    )

    bot_msg = chat_msg.then(bot, chatbot, chatbot)

    clear_and_download.click(
        reset_state, None, [dropdown, start_button, chatbot, chat_input, clear_and_download]
    )

app.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://75a7ca648b81befeeb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://75a7ca648b81befeeb.gradio.live


