In [1]:
#pip install langchain langchain_community langchain_huggingface pypdf faiss-cpu

In [2]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
import gradio as gr
import webview
import threading
import os
import json
from datetime import datetime
os.environ["GOOGLE_API_KEY"] = "YOUR_API"


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Carica i due indici FAISS
embedding_model = HuggingFaceBgeEmbeddings(
        model_name="BAAI/bge-large-en-v1.5",
        encode_kwargs={'normalize_embeddings': True}
    )


vectorstore = FAISS.load_local("last_merge", embedding_model, allow_dangerous_deserialization=True)



# Definizione del prompt ottimizzato con tecniche avanzate
template = """
<system>
You are an AI assistant expert in Large Language Models (LLMs) and Natural Language Processing (NLP), designed to answer questions related exclusively to these topics.
You work as the official chatbot of the University of Salerno (UNISA), Department of DIEM 
(Department of Information and Electrical Engineering and Applied Mathematics).
You must answer both organizational questions about the course (such as the recommended book, professors, etc.) and questions regarding the topics covered.
Answer only in english.
</system>

<instructions>
1. Domain and Scope:
   - Only answer questions related to LLMs and NLP.
   - For any query outside this domain, respond humorously that you cannot help because you specialize exclusively in LLMs and NLP.
2. Clear and Unified Responses:
   - Provide a coherent answer that **systematically blends** information from the provided context with your **deep expertise in LLMs/NLP**.
   - **Always enrich** the context with your internal knowledge to add value, technical depth, and didactic clarity, even when the context appears complete.
   - Ensure the integration feels organic by:
     • Using the context as foundation 
     • Expanding concepts with technical details from your knowledge
     • Maintaining a natural flow between sources
   - Structure the answer in logical paragraphs. 
   - At the end of the answer add "---".
3. Contextual Integration:
   - Use the conversation history and provided context to ensure continuity, relevance, and precision.
   - If you find informations in other language, translate them in English.
4. Name Verification and Precision:
   - **Always check that any person's full name appears exactly as provided in the context**.
   - **If a name is not found exactly, reply with: "I have no information about this person. Perhaps you meant... ?" and suggest the two closest alternatives**.
5. Uncertainty and Transparency:
   - If you are not 100% sure about any detail, admit the uncertainty rather than inventing information.
6. Synonym and Variation Handling:
   - Recognize that slight rephrasings (e.g., "What is the book?" vs. "What is the recommended book for the course?, "What is a LLM?" vs "What is a Large Language Model?") refer to the same query.
7. Code Provision Guidelines:
   - Provide code only if it directly pertains to the information or functionalities present in the context.
   - If the requested code falls outside the current context, state that you can only provide code for functionalities already covered.
8. Defending the Answer:
   - If challenged (e.g., "Are you sure?"), reaffirm that your answer is based on both the provided context and established knowledge of LLMs and NLP.
   - Cite the relevant source if necessary.
9. Handling Adversarial Questions:
    - For questions that deviate from LLMs and NLP, respond humorously that you cannot help.
</instructions>

<examples>
Question: "What is a LLM?"
Answer: "A Large Language Model (LLM) is a type of artificial intelligence (AI) model designed to understand, generate, and manipulate human language. These models are trained on vast amounts of text data and use advanced machine learning techniques, particularly deep learning, to perform tasks such as text generation, translation, summarization, question answering, and more. ---"

Question: "What is NLP?"
Answer: "Natural Language Processing (NLP) is a field of artificial intelligence (AI) that focuses on enabling machines to understand, interpret, generate, and manipulate human language. It combines linguistics and machine learning to process text and speech in a way that is both meaningful and useful. ---"

Question: "What's the difference between skip-gram and continuous bag of words?"
Answer: "Skip-gram predicts surrounding words given a target word, while Continuous Bag of Words (CBOW) predicts a target word from its surrounding context. Skip-gram tends to perform better on rare words by generating multiple training samples per word, whereas CBOW is faster and more efficient but may struggle with infrequent words. ---"

Question: "Who is the president of the United States of America?"
Answer: "Sorry, I have an exclusive relationship with the course of LLM and NLP. All other questions give me sudden selective amnesia! ---"
</examples>

<context>
{context}
</context>

<conversation history>
{chat_history}
</conversation history>



<question>
{question}
</question>

<answer>
"""

prompt_template = PromptTemplate(template=template, input_variables=["context", "chat_history", "question"])
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3)


In [4]:
# Creazione del retriever
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 7}
)

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    input_key="question",  
    output_key="answer"    
)

# Creazione della catena con memoria
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": prompt_template},
    return_source_documents=True,  
    output_key="answer"  
)

os.makedirs("valutazioni", exist_ok=True)

# Modifica della funzione save_evaluation
def save_evaluation(evaluation_data):
    """Salva le valutazioni in un unico file JSON e calcola le medie"""
    filename = "valutazioni/valutazioni.json"
    
    try:
        # Leggi i dati esistenti o inizializza una nuova struttura
        if os.path.exists(filename):
            with open(filename, 'r', encoding='utf-8') as f:
                data = json.load(f)
        else:
            data = {
                "valutazioni": [],
                "media": {
                    "pertinenza": 0,
                    "coerenza": 0,
                    "completezza": 0,
                    "chiarezza": 0,
                    "correttezza": 0
                }
            }

        # Aggiungi la nuova valutazione
        data["valutazioni"].append(evaluation_data)

        # Calcola le medie solo dalle valutazioni valide
        valutazioni_valide = [v for v in data["valutazioni"] if "error" not in v]
        
        if valutazioni_valide:
            # Calcola le somme per ogni criterio
            somme = {
                "pertinenza": 0,
                "coerenza": 0,
                "completezza": 0,
                "chiarezza": 0,
                "correttezza": 0
            }

            for valutazione in valutazioni_valide:
                for criterio in somme:
                    somme[criterio] += valutazione.get(criterio, 0)

            # Calcola le medie
            data["media"] = {
                criterio: round(somme[criterio] / len(valutazioni_valide), 2)
                for criterio in somme
            }

        # Salva il file aggiornato
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2, ensure_ascii=False)
            
        return filename
    except Exception as e:
        print(f"Errore nel salvataggio: {str(e)}")
        return None

# Modifica della funzione llm_judge
def llm_judge(query, context, response):
    evaluation_prompt = f"""
    Valuta la risposta del chatbot come esperto NLP/LLM seguendo questi criteri (1-5):
    1. Pertinenza alla domanda: "{query}"
    2. Coerenza col contesto fornito: {context[:1000]}...
    3. Completezza tecnica
    4. Chiarezza espositiva
    5. Correttezza accademica

    Risposta da valutare: "{response}"

    Fornisci i punteggi in formato JSON con questo schema:
    {{"pertinenza": int, "coerenza": int, "completezza": int, 
    "chiarezza": int, "correttezza": int, "commento": "stringa"}}
    """
    
    judge_llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
    evaluation = judge_llm.invoke(evaluation_prompt)
    
    try:
        eval_data = json.loads(evaluation.content.replace("```json", "").replace("```", "").strip())
        eval_data.update({
            "timestamp": datetime.now().isoformat(),
            "query": query,
            "response": response
        })
        save_evaluation(eval_data)
        return eval_data
    except Exception as e:
        error_data = {
            "error": str(e),
            "timestamp": datetime.now().isoformat(),
            "query": query,
            "raw_response": evaluation.content
        }
        save_evaluation(error_data)
        return {"error": "Errore nel parsing della valutazione"}
    

def ask_question(query):
    result = conversation_chain.invoke({"question": query})
    response = result["answer"]
    
    
    context = "\n".join([doc.page_content for doc in result["source_documents"]])
    
    
    evaluation = llm_judge(query, context, response)
    
    
    print(f"\nValutazione Gemini: {evaluation}\n")
    
    if "---" in response:
        response = response.split("---")[0].strip()
    return response


  memory = ConversationBufferMemory(


In [5]:
# Funzione per gestire la chat - modificata per supportare la cronologia delle conversazioni
def gradio_chat(message, history):
    risposta = ask_question(message)
    return risposta

# Creazione dell'interfaccia ChatInterface di Gradio
chat_interface = gr.ChatInterface(
    fn=gradio_chat,
    title="Chatbot Team 4",
    description="By Raffaele Solimeno, Antonio Russomando, Luca Memoli and Giuseppe Marotta",
    examples=["What is a Large Language Model?", "What is Natural Language Processing?", "Who are the members of the project group?"],
    theme="JohnSmith9982/small_and_pretty"
)

# Funzione per avviare Gradio in un thread separato
def start_gradio():
    chat_interface.launch(server_name="127.0.0.1", server_port=7861, share=False, inbrowser=False)

# Avvio di Gradio in un thread separato
threading.Thread(target=start_gradio, daemon=True).start()

# Attendi qualche istante per il lancio del server e poi apri una nuova finestra desktop con pywebview
webview.create_window("Chatbot LLM e NLP", "http://127.0.0.1:7861", width=1200, height=700)
webview.start()



* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\envs\llm\Lib\site-packages\starlette\responses.py", line 259, in __call__
    await wrap(partial(self.listen_for_disconnect, receive))
  File "c:\ProgramData\anaconda3\envs\llm\Lib\site-packages\starlette\responses.py", line 255, in wrap
    await func()
  File "c:\ProgramData\anaconda3\envs\llm\Lib\site-packages\starlette\responses.py", line 232, in listen_for_disconnect
    message = await receive()
              ^^^^^^^^^^^^^^^
  File "c:\ProgramData\anaconda3\envs\llm\Lib\site-packages\uvicorn\protocols\http\h11_impl.py", line 534, in receive
    await self.message_event.wait()
  File "c:\ProgramData\anaconda3\envs\llm\Lib\asyncio\locks.py", line 212, in wait
    await fut
asyncio.exceptions.CancelledError: Cancelled by cancel scope 1c98071d250

During handling of the above exception, another exception occurred:

  + Exception Group Traceback (most recent call last):
  |   Fi


Valutazione Gemini: {'pertinenza': 5, 'coerenza': 5, 'completezza': 5, 'chiarezza': 5, 'correttezza': 5, 'commento': "La risposta è eccellente. È pertinente, coerente con il contesto fornito, completa tecnicamente, chiara nell'esposizione e corretta dal punto di vista accademico. L'aggiunta di spiegazioni sui concetti chiave (come zero-shot learning, BPE, positional encoding) aumenta notevolmente il valore della risposta.", 'timestamp': '2025-03-23T16:07:08.553143', 'query': 'Tell me the differences between LLama and GPT', 'response': "The comparison between the LLaMA and GPT models can be broken down into several key aspects:\n\n**Size Range:** LLaMA models come in sizes of 7B, 13B, 30B, and 65B parameters. GPT models range from 117M to 175B+ parameters, including GPT-3. The size of a model often correlates with its capacity to learn and generalize from data; larger models can capture more complex patterns but require more computational resources.\n\n**Training Data:** LLaMA is train