<a href="https://colab.research.google.com/github/etuckerman/AI-Powered_CS-S_Surfing_Community_Assistant/blob/main/KSF_LLM_testing_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q pandas scikit-learn numpy sentence-transformers tqdm langchain langchain_community transformers bitsandbytes accelerate nltk gradio
!pip install --upgrade langchain gradio




In [None]:
# Import necessary packages
import pandas as pd
import re
import numpy as np
import pickle
import os
from tqdm.auto import tqdm
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain import HuggingFacePipeline, PromptTemplate
from langchain.chains import LLMChain
from sentence_transformers import SentenceTransformer, util
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from google.colab import files
import nltk
import gradio as gr
from sklearn.metrics.pairwise import cosine_similarity
from langchain_core.runnables.base import RunnableSequence



In [None]:


# Load data
def load_or_upload_file(file_name):
    if os.path.exists(file_name):
        return pd.read_csv(file_name), f"File '{file_name}' found and loaded."
    else:
        print(f"File '{file_name}' not found. Please upload the file.")
        uploaded = files.upload()
        df = pd.read_csv(next(iter(uploaded.keys())))
        return df, f"File '{file_name}' uploaded and loaded."


In [None]:

df, message = load_or_upload_file('messages.csv')
print(message)


File 'messages.csv' found and loaded.


In [None]:

player_df, player_message = load_or_upload_file('playerdata.csv')
print(player_message)


File 'playerdata.csv' found and loaded.


In [None]:

# Data Cleaning
def clean_messages(df):
    nltk.download('stopwords')
    nltk.download('wordnet')

    df['Message'] = df['Message'].astype(str)
    df['Message'] = df['Message'].apply(lambda x: re.sub(r'[^\x00-\x7F]+|http\S+|@\S+', '', x))
    df = df[df['Message'].str.strip().str.len() > 2]

    short_responses = set(['ok', 'lol', 'haha', 'yes', 'no', 'sure', 'right', 'cool', 'nah', 'yep', 'nope', 'yeah', 'k', 'hm'])
    df = df[~df['Message'].str.lower().isin(short_responses)]

    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    df['Message'] = df['Message'].apply(lambda x: ' '.join([lemmatizer.lemmatize(word) for word in x.split() if word.lower() not in stop_words]))

    return df.reset_index(drop=True)

df = clean_messages(df)
df.to_csv('cleaned_messages.csv', index=False)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [None]:

# Initialize the SentenceTransformer model
model = SentenceTransformer('all-mpnet-base-v2', device='cuda')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:

def extract_qa_pairs(df, window_size=50):
    # Prepare to store embeddings
    messages = df['Message'].tolist()

    # Compute embeddings for all messages
    message_embeddings = model.encode(messages)

    qa_pairs = []

    for i in range(len(df) - window_size):
        message = messages[i]

        if message.endswith('?') or any(word in message.lower() for word in ['who', 'what', 'where', 'when', 'why', 'how']):
            potential_answers = messages[i + 1: min(i + 1 + window_size, len(df))]

            if potential_answers:
                question_embedding = message_embeddings[i]
                answer_embeddings = message_embeddings[i + 1: min(i + 1 + window_size, len(df))]

                similarities = cosine_similarity([question_embedding], answer_embeddings)[0]
                best_answer_index = np.argmax(similarities) if len(potential_answers) > 1 else 0

                best_answer = potential_answers[best_answer_index]

                if not best_answer.endswith('?'):
                    qa_pairs.append({'Question': message, 'Answer': best_answer})

    return qa_pairs

qa_pairs = extract_qa_pairs(df)
messages_qa = pd.DataFrame(qa_pairs)
messages_qa.to_csv('qa_pairs.csv', index=False)


In [None]:

def save_or_load_embeddings(df):
    if os.path.exists('qa_embeddings.pkl'):
        with open('qa_embeddings.pkl', 'rb') as f:
            df['Question_Embedding'] = pickle.load(f)
            df['Answer_Embedding'] = pickle.load(f)
        print("QA Embeddings loaded from 'qa_embeddings.pkl'.")
    else:
        tqdm.pandas()
        df['Question_Embedding'] = df['Question'].progress_apply(lambda x: model.encode(x))
        df['Answer_Embedding'] = df['Answer'].progress_apply(lambda x: model.encode(x))

        with open('qa_embeddings.pkl', 'wb') as f:
            pickle.dump(df['Question_Embedding'].tolist(), f)
            pickle.dump(df['Answer_Embedding'].tolist(), f)
        print("QA Embeddings computed and saved to 'qa_embeddings.pkl'.")

save_or_load_embeddings(messages_qa)


  0%|          | 0/35356 [00:00<?, ?it/s]

  0%|          | 0/35356 [00:00<?, ?it/s]

QA Embeddings computed and saved to 'qa_embeddings.pkl'.


In [None]:

def find_relevant_qa_pair(user_query, messages_qa):
    query_embedding = model.encode(user_query)
    messages_qa['Question_Similarity'] = messages_qa['Question_Embedding'].progress_apply(lambda x: cosine_similarity([query_embedding], [x])[0][0])
    messages_qa['Answer_Similarity'] = messages_qa['Answer_Embedding'].progress_apply(lambda x: cosine_similarity([query_embedding], [x])[0][0])
    messages_qa['Similarity'] = (messages_qa['Question_Similarity'] + messages_qa['Answer_Similarity']) / 2

    best_match = messages_qa.loc[messages_qa['Similarity'].idxmax()]
    return best_match['Question'], best_match['Answer']


In [None]:
def preprocess_player_data(df):
    df['First online'] = pd.to_datetime(df['First online'])
    df['Last online'] = pd.to_datetime(df['Last online'])
    return df

player_df = preprocess_player_data(player_df)


In [None]:

def get_player_info(player_name):
    player_info = player_df[player_df['Name'].str.contains(player_name, case=False, na=False)]
    return player_info.to_dict(orient='records') if not player_info.empty else None


In [None]:

def attach_player_data_to_qa(messages_qa, player_df):
    def get_player_info(player_name):
        player_info = player_df[player_df['Name'].str.contains(player_name, case=False, na=False)]
        return player_info.to_dict(orient='records') if not player_info.empty else None

    # Ensure player names are strings
    player_df['Name'] = player_df['Name'].astype(str)

    # Initialize 'Player_Info' column if it does not exist
    if 'Player_Info' not in messages_qa.columns:
        messages_qa['Player_Info'] = None

    for idx, row in messages_qa.iterrows():
        question = row['Question']
        answer = row['Answer']

        # Check for player mentions in both Question and Answer
        for player_name in player_df['Name']:
            player_name_str = str(player_name)  # Ensure player_name is a string
            if re.search(r'\b' + re.escape(player_name_str) + r'\b', question, re.IGNORECASE) or \
               re.search(r'\b' + re.escape(player_name_str) + r'\b', answer, re.IGNORECASE):
                player_info = get_player_info(player_name_str)
                if player_info:
                    messages_qa.at[idx, 'Player_Info'] = player_info
                break

    return messages_qa


In [None]:


from huggingface_hub import notebook_login
notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Function to handle loading checkpoints
def load_checkpoints():
    files_to_load = {
        'cleaned_messages.csv': 'DataFrame',
        'messages.csv': 'DataFrame',
        'playerdata.csv': 'DataFrame',
        'qa_embeddings.pkl': 'Embeddings',
        'qa_pairs.csv': 'DataFrame',
        'huggingface_token.txt': 'Token'
    }

    checkpoints = {}

    for file_name, file_type in files_to_load.items():
        if os.path.exists(file_name):
            if file_type == 'DataFrame':
                checkpoints[file_name] = pd.read_csv(file_name)
            elif file_type == 'Embeddings':
                with open(file_name, 'rb') as f:
                    checkpoints[file_name] = pickle.load(f), pickle.load(f)
            elif file_type == 'Token':
                with open(file_name, 'r') as f:
                    checkpoints[file_name] = f.read().strip()
        else:
            checkpoints[file_name] = None

    # Define necessary functions
    def find_relevant_qa_pair(user_query, messages_qa):
        query_embedding = model.encode(user_query)
        messages_qa['Question_Similarity'] = messages_qa['Question_Embedding'].progress_apply(lambda x: cosine_similarity([query_embedding], [x])[0][0])
        messages_qa['Answer_Similarity'] = messages_qa['Answer_Embedding'].progress_apply(lambda x: cosine_similarity([query_embedding], [x])[0][0])
        messages_qa['Similarity'] = (messages_qa['Question_Similarity'] + messages_qa['Answer_Similarity']) / 2

        best_match = messages_qa.loc[messages_qa['Similarity'].idxmax()]
        return best_match['Question'], best_match['Answer']


    return checkpoints

# Example usage of the function:
checkpoints = load_checkpoints()

if checkpoints['cleaned_messages.csv'] is not None:
    df = checkpoints['cleaned_messages.csv']
else:
    # Handle case where file does not exist
    pass

if checkpoints['messages.csv'] is not None:
    messages_qa = checkpoints['messages.csv']
else:
    # Handle case where file does not exist
    pass

if checkpoints['playerdata.csv'] is not None:
    player_df = checkpoints['playerdata.csv']
else:
    # Handle case where file does not exist
    pass

if checkpoints['qa_embeddings.pkl'] is not None:
    question_embeddings, answer_embeddings = checkpoints['qa_embeddings.pkl']
else:
    # Handle case where file does not exist
    pass


# Read Hugging Face token from file
def get_huggingface_token(file_path):
    with open(file_path, 'r') as file:
        return file.read().strip()

# Set up authentication with the Hugging Face token
def authenticate_huggingface(token):
    os.environ['HUGGINGFACE_TOKEN'] = token

# Path to your Hugging Face token file
token_file_path = 'huggingface_token.txt'
token = get_huggingface_token(token_file_path)
authenticate_huggingface(token)

if checkpoints['huggingface_token.txt'] is not None:
    token = checkpoints['huggingface_token.txt']
    authenticate_huggingface(token)  # Ensure this function is defined

# Continue with model loading or other processing


In [None]:

# Load and setup the LLaMA 3.1 model
def setup_llama_model():
    model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=True, enable_mem_efficient=True):
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            trust_remote_code=True,
            device_map='auto',
            offload_buffers=True
        )

    return model, tokenizer


In [None]:
# Initialize the model and tokenizer
llama_model, tokenizer = setup_llama_model()
hf_pipeline = pipeline('text-generation', model=llama_model, tokenizer=tokenizer)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  self.gen = func(*args, **kwds)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



In [None]:
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Define the fixed context with detailed information about Counter-Strike Surf
FIXED_CONTEXT = (
    "Counter-Strike Surf is a popular custom game mode within the Counter-Strike series, specifically designed for players who enjoy navigating complex maps using a surfing mechanic. "
    "In this mode, players use a combination of skillful movement and precise control to glide along sloped surfaces and ramps, often to achieve high speeds and complete challenging courses. "
    "Surf maps are characterized by their unique layouts, which include angled surfaces, jumps, and obstacles that require careful maneuvering.\n\n"
    "The gameplay of Counter-Strike Surf emphasizes player dexterity and spatial awareness. Players must master the art of 'surfing' to maintain speed and navigate through the map, often aiming to set personal or competitive records.\n\n"
    "In this application, users input a query related to Counter-Strike Surf. The code retrieves the most relevant question and answer (QA) pair from a pre-defined database of QA pairs. "
    "The large language model (LLM) then generates a response to the user's query. The response is based on the relevant QA pair, provided the pair is pertinent and not irrelevant or junk. "
    "The goal is to ensure that the answer generated is useful and contextually accurate based on the retrieved information."
)

# Define the function that will be used by Gradio
def generate_response(user_query):
    question, relevant_message = find_relevant_qa_pair(user_query, messages_qa)
    prompt = (f"User Query: {user_query}\n"
              f"Relevant Message: {relevant_message}\n"
              f"Context: {FIXED_CONTEXT}\n"
              f"Answer:")
    model_input = tokenizer(prompt, return_tensors="pt").to('cuda' if torch.cuda.is_available() else 'cpu')

    with torch.no_grad():
        outputs = model.generate(
            **model_input,
            max_length=200,  # Adjust max_length if needed
            num_return_sequences=1,
            temperature=0.7,
            top_p=0.9
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.strip()

def gradio_interface():
    iface = gr.Interface(
        fn=generate_response,
        inputs=[
            gr.Textbox(label="User Query", placeholder="Enter your query here...")
        ],
        outputs="text",
        title="Counter-Strike Surf QA Chatbot",
        description="Enter your query related to Counter-Strike Surf to get a relevant and contextual response."
    )
    return iface

# Initialize the tokenizer and model
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Check GPU memory and load model accordingly
if torch.cuda.is_available():
    # Get the free GPU memory in GB
    free_memory = torch.cuda.mem_get_info()[0] / 1024**3

    # Set a threshold for loading the model (e.g., 10 GB)
    memory_threshold = 10

    if free_memory > memory_threshold:
        model.to('cuda')
    else:
        print(f"Warning: Not enough GPU memory to load the model. Free memory: {free_memory:.2f} GB. Required memory: > {memory_threshold} GB")
        # Consider using a smaller model or different offloading techniques
else:
    print("Warning: No GPU available. Running on CPU.")


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
# Load model and tokenizer
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to('cuda' if torch.cuda.is_available() else 'cpu')


NameError: name 'AutoTokenizer' is not defined

In [None]:

# Load QA pairs from CSV
qa_pairs_df = pd.read_csv('qa_pairs.csv')
qa_pairs = qa_pairs_df.to_dict(orient='records')

# Load QA embeddings from pickle file
with open('qa_embeddings.pkl', 'rb') as f:
    qa_embeddings = pickle.load(f)

# Define the fixed context with detailed information about Counter-Strike Surf
FIXED_CONTEXT = (
    "Counter-Strike Surf is a popular custom game mode within the Counter-Strike series, specifically designed for players who enjoy navigating complex maps using a surfing mechanic. "
    "In this mode, players use a combination of skillful movement and precise control to glide along sloped surfaces and ramps, often to achieve high speeds and complete challenging courses. "
    "Surf maps are characterized by their unique layouts, which include angled surfaces, jumps, and obstacles that require careful maneuvering.\n\n"
    "The gameplay of Counter-Strike Surf emphasizes player dexterity and spatial awareness. Players must master the art of 'surfing' to maintain speed and navigate through the map, often aiming to set personal or competitive records.\n\n"
    "In this application, users input a query related to Counter-Strike Surf. The code retrieves the most relevant question and answer (QA) pair from a pre-defined database of QA pairs. "
    "The large language model (LLM) then generates a response to the user's query. The response is based on the relevant QA pair, provided the pair is pertinent and not irrelevant or junk. "
    "The goal is to ensure that the answer generated is useful and contextually accurate based on the retrieved information."
)

# Define the function that will be used by Gradio
def generate_response(user_query):
    question, relevant_message = find_relevant_qa_pair(user_query, messages_qa)
    prompt = (f"User Query: {user_query}\n"
              f"Relevant Message: {relevant_message}\n"
              f"Context: {FIXED_CONTEXT}\n"
              f"Answer:")
    model_input = tokenizer(prompt, return_tensors="pt").to('cuda' if torch.cuda.is_available() else 'cpu')

    with torch.no_grad():
        outputs = model.generate(
            **model_input,
            max_length=200,  # Adjust max_length if needed
            num_return_sequences=1,
            temperature=0.7,
            top_p=0.9
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.strip()

def gradio_interface():
    iface = gr.Interface(
        fn=generate_response,
        inputs=[
            gr.Textbox(label="User Query", placeholder="Enter your query here...")
        ],
        outputs="text",
        title="Counter-Strike Surf QA Chatbot",
        description="Enter your query related to Counter-Strike Surf to get a relevant and contextual response."
    )
    return iface

import numpy as np
import pickle

# Load QA embeddings and pairs
with open('qa_embeddings.pkl', 'rb') as f:
    qa_embeddings = pickle.load(f)

with open('qa_pairs.csv', 'r') as f:
    qa_pairs = [line.strip().split(',') for line in f]

def find_relevant_qa_pair(query_embedding):
    # Compute similarity between query_embedding and stored embeddings
    similarities = np.dot(qa_embeddings, query_embedding) / (np.linalg.norm(qa_embeddings, axis=1) * np.linalg.norm(query_embedding))

    # Find the index of the most similar QA pair
    most_similar_idx = np.argmax(similarities)

    # Retrieve the relevant QA pair
    relevant_qa_pair = qa_pairs[most_similar_idx]

    return relevant_qa_pair



# Launch the Gradio interface
iface = gradio_interface()
iface.launch()


In [None]:
def test_model():
    test_query = "Example query related to Counter-Strike Surf."
    print(generate_response(test_query))

test_model()


NameError: name 'messages_qa' is not defined

In [None]:

def chatbot_interface(user_query):
    relevant_qa = find_relevant_qa_pair(user_query, messages_qa)
    question, answer = relevant_qa if relevant_qa else ('Sorry, I couldn\'t find an answer.', 'No answer found.')

    output = hf_pipeline(f"Question: {question}\nAnswer: {answer}", max_length=150)
    return output[0]['generated_text']

# Gradio interface
def gradio_interface(user_query):
    return chatbot_interface(user_query)

iface = gr.Interface(fn=gradio_interface, inputs="text", outputs="text")
iface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://a5f6fa04b304358b67.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [None]:
!pip install accelerate
!pip install bitsandbytes

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain import HuggingFacePipeline

def create_pipeline(model, tokenizer):
    return pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,  # Use float16 for memory efficiency
        device_map="auto",  # Let transformers decide the best device mapping
        max_length=500,
        truncation=True
    )

# Initialize the model and tokenizer
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the model with bitsandbytes
with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=True, enable_mem_efficient=True):
    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map='auto')

pipeline_inst = create_pipeline(model, tokenizer)
llm = HuggingFacePipeline(pipeline=pipeline_inst)



  self.gen = func(*args, **kwds)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

  llm = HuggingFacePipeline(pipeline=pipeline_inst)


In [None]:

# Define the template for generating refined responses
template = """<s>[INST] You are a helpful assistant, knowledgeable about Counter-Strike: Source surfing. A user has asked the following question:

{user_query}

Here's a relevant message from the CS:S surfing community:

{most_relevant_message}

Please provide a helpful and informative response to the user's question, considering the relevant message.
</s>
<out>
Refined Answer: [/INST]
"""


In [None]:

def generate_refined_answer(user_query, most_relevant_message, player_info=None):
    prompt = PromptTemplate(template=template, input_variables=["user_query", "most_relevant_message"])
    llm_chain = RunnableSequence([prompt, llm])
    response = llm_chain.invoke({"user_query": user_query, "most_relevant_message": most_relevant_message})

    if player_info:
        player_info_str = "\n".join([f"{key}: {value}" for player in player_info for key, value in player.items()])
        response += f"\n\nPlayer Information:\n{player_info_str}"

    return response


In [None]:

def find_most_relevant_message(user_query, messages_qa):
    user_query_embedding = model.encode(user_query, convert_to_tensor=True)
    max_similarity = -1
    most_relevant_message = None

    for index in tqdm(range(len(messages_qa)), desc="Finding relevant message"):
        message_data = messages_qa.iloc[index]
        message = message_data['Question']
        message_embedding = model.encode(message, convert_to_tensor=True)
        similarity = util.cos_sim(user_query_embedding, message_embedding).item()

        if similarity > max_similarity:
            max_similarity = similarity
            most_relevant_message = message_data['Answer']

    return most_relevant_message


In [None]:

def chatbot_interface(user_query):
    player_info = None

    for player_name in player_df['Name']:
        if re.search(r'\b' + re.escape(player_name) + r'\b', user_query, re.IGNORECASE):
            player_info = get_player_info(player_name)
            break

    most_relevant_message = find_most_relevant_message(user_query, messages_qa)
    if most_relevant_message:
        return generate_refined_answer(user_query, most_relevant_message, player_info)
    else:
        return "Sorry, I couldn't find a relevant answer to your question."


In [None]:

# Example usage:
user_query = "Is Caff a good surfer?"

In [None]:
question, answer = find_relevant_qa_pair(user_query, messages_qa)

NameError: name 'model' is not defined

In [None]:
print(question, answer)

want ask good surfers, ksf people channel asked there. basically wanted know whoelse could/should ask (I guess) anyone know good surfer arent ksf


In [None]:
refined_answer = generate_refined_answer(user_query, answer)

TypeError: Expected a Runnable, callable or dict.Instead got an unsupported type: <class 'list'>

In [None]:

print(refined_answer)


In [None]:
# Gradio Interface
def gradio_interface(user_query):
    return chatbot_interface(user_query)

iface = gr.Interface(fn=gradio_interface, inputs="text", outputs="text")
iface.launch()


IMPORTANT: You are using gradio version 3.35.2, however version 4.29.0 is available, please upgrade.
--------
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

