# ETIC Summarizer Interviews

This code implements a Retrieval-Augmented Generation (RAG) system designed to summarize interviews conducted as part of the early-career research within the Einstein Telescope Infrastructure Consortium (ETIC) project. The RAG system is used exclusively for the initial extraction of key concepts. These concepts are then refined and adjusted manually to ensure both accuracy and comprehensibility. **Note**: As with all AI-based applications, the program can make mistakes. Please verify important information.

This tutorial is associated to the submitted  article *Early-Career Scientists and Communication Strategy: The ETIC Project Experience*.



In [1]:
import gradio as gr
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
import os

# This custom class filters only the content from the response, excluding metadata
class StrOutputParser:
    def __call__(self, output):
        return output.get("content", "No content available") if isinstance(output, dict) else str(output)

# Instantiate the language model, specifying key parameters
model = ChatOpenAI(
    base_url="http://localhost:11434/v1",  # Local server for the language model
    temperature=0,                         # Controls response randomness (0 = deterministic)
    api_key="dummy_key",                   # API key placeholder
    model_name="mistral"                   # Name of the model to use
)

# Dictionary mapping full names to their corresponding file paths
document_paths = {
    "Alessandro Parisi": "/Users/gius/Desktop/POSTER_polonia/en/AlessandroParisi_en.txt",
    "Alessandro Santarelli": "/Users/gius/Desktop/POSTER_polonia/en/AlessandroSantarelli_en.txt",
    "Andrea Lampis": "/Users/gius/Desktop/POSTER_polonia/en/AndreaLampis_en.txt",
    "Angelo Loi": "/Users/gius/Desktop/POSTER_polonia/en/AngeloLoi_en.txt",
    "Barbara Garaventa": "/Users/gius/Desktop/POSTER_polonia/en/BarbaraGaraventa_en.txt",
    "Diana Lumaca": "/Users/gius/Desktop/POSTER_polonia/en/DianaLumaca_en.txt",
    "Elena Licciardello": "/Users/gius/Desktop/POSTER_polonia/en/ElenaLicciardello_en.txt",
    "Emanuele Tofani": "/Users/gius/Desktop/POSTER_polonia/en/EmanueleTofani_en.txt",
    "Francesco Bianchi": "/Users/gius/Desktop/POSTER_polonia/en/FrancescoBianchi_en.txt",
    "Gabriele Capoccia": "/Users/gius/Desktop/POSTER_polonia/en/GabrieleCapoccia_en.txt",
    "Giacomo Galimberti": "/Users/gius/Desktop/POSTER_polonia/en/GiacomoGalimberti_en.txt",
    "Hanna Skliarova": "/Users/gius/Desktop/POSTER_polonia/en/HannaSkliarova_en.txt",
    "Leonardo Lucchesi": "/Users/gius/Desktop/POSTER_polonia/en/LeonardoLucchesi_en.txt",
    "Lia Lavezzi": "/Users/gius/Desktop/POSTER_polonia/en/LiaLavezzi_en.txt",
    "Luca Tabasso": "/Users/gius/Desktop/POSTER_polonia/en/LucaTabasso_en.txt",
    "Lucia Trozzo": "/Users/gius/Desktop/POSTER_polonia/en/LuciaTrozzo_en.txt",
    "Luciano Errico": "/Users/gius/Desktop/POSTER_polonia/en/LucianoErrico_en.txt",
    "Marco Moro": "/Users/gius/Desktop/POSTER_polonia/en/MarcoMoro_en.txt",
    "Maria Cifaldi": "/Users/gius/Desktop/POSTER_polonia/en/MariaCifaldi_en.txt",
    "Matteo Cagnizi": "/Users/gius/Desktop/POSTER_polonia/en/MatteoCagnizi_en.txt",
    "Matteo Ianni": "/Users/gius/Desktop/POSTER_polonia/en/MatteoIanni_en.txt",
    "Michael Caminale": "/Users/gius/Desktop/POSTER_polonia/en/MichaelCaminale_en.txt",
    "Monica Marzario": "/Users/gius/Desktop/POSTER_polonia/en/MonicaMarzario_en.txt",
    "Monique Bossi": "/Users/gius/Desktop/POSTER_polonia/en/MoniqueBossi_en.txt",
    "Nicole Busdon": "/Users/gius/Desktop/POSTER_polonia/en/NicoleBusdon_en.txt",
    "Paolo Prosperi": "/Users/gius/Desktop/POSTER_polonia/en/PaoloProsperi_en.txt",
    "Peppe D'Aranno": "/Users/gius/Desktop/POSTER_polonia/en/PeppeDAranno_en.txt",
    "Stefania Gravina": "/Users/gius/Desktop/POSTER_polonia/en/StefaniaGravina_en.txt",
    "Stefano Mais": "/Users/gius/Desktop/POSTER_polonia/en/StefanoMais_en.txt",
    "Valeria Milotti": "/Users/gius/Desktop/POSTER_polonia/en/ValeriaMilotti_en.txt",
    "Valerio Scacco": "/Users/gius/Desktop/POSTER_polonia/en/ValerioScacco_en.txt",
}

# Function to generate a summary for the selected document
def generate_summary(selected_name):
    # Retrieve the file path for the selected name
    selected_doc = document_paths.get(selected_name)
    
    # Check if the document exists at the specified path
    if selected_doc and os.path.exists(selected_doc):
        with open(selected_doc, 'r', encoding='utf-8') as file:
            content = file.read()
            filename = os.path.basename(selected_doc)  # Extract file name from path
            metadata = {"source": filename}
            doc = Document(page_content=content, metadata=metadata)

        # Split the document text into chunks suitable for processing
        # NOTE: # We process the entire document without splitting it into chunks, 
        # considering the brevity of the texts.

        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000000, chunk_overlap=0)
        splits = text_splitter.split_documents([doc])

        # Initialize embeddings with the Ollama model for document vectorization
        embeddings = OllamaEmbeddings(model="nomic-embed-text")
        faiss_index = FAISS.from_documents(splits, embeddings)  # Index with FAISS for fast similarity search
        retriever = faiss_index.as_retriever(search_type="similarity", search_kwargs={"k": 1})

        # Define the template prompt for summarization
        template = """
        You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. 
        If you don't know the answer, just say that you don't know. 

        Question: {question} 

        Context: {context} 

        Answer:
        """
        prompt = PromptTemplate.from_template(template)

        # Helper function to format documents into plain text
        def format_documents(docs):
            return "\n\n".join(doc.page_content for doc in docs)

        # Combine retriever, prompt, and model into a processing chain
        rag_chain = (
            {
                "context": retriever | format_documents,
                "question": RunnablePassthrough()
            }
            | prompt
            | model
            | StrOutputParser()
        )

        # Specify the query to focus on recent activities related to the Einstein Telescope project
        #query_text = f"Summarize the content of {filename}, focusing on recent activities in the Einstein Telescope project."
        query_text = f"List 15 keywords {filename} from the interview focusing on recent activities in the Einstein Telescope (ET) project and past experiences."
        summary = rag_chain.invoke(query_text)  # Get the summarized output
        return summary  # Return the summary text
    else:
        return "Document not found. Please check the file path."

# Set up a Gradio interface with a dropdown for document selection
interface = gr.Interface(
    fn=generate_summary,
    inputs=gr.Dropdown(choices=list(document_paths.keys()), label="Select a Document"),
    outputs="text",
    title="ETIC Summarizer Interviews",
    description = (
    "Select a name to get a summary of the associated document.\n"
    "As with all generative AI applications, the program can make mistakes. Check important information.\n"
    "The input of this program is manually processed afterward to build the thematic cloud for each interviewee."
)
)

# Launch the Gradio app
interface.launch()


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




## Keywords for Each Interviewee
The previous RAG process will help us populate a JSON file,`keywords.json`, as shown below. The numeric value following each key represents the font size within the word cloud. This process is done manually, taking into account the following considerations:

The font sizes are not proportional to word frequency; instead, they are differentiated to highlight distinct categories of information. Specifically, participants' names are displayed in larger fonts, followed by their affiliated institutions and the challenges they are addressing. Past experiences are represented in smaller fonts. This approach emphasizes current challenges and participants' contributions while still referencing their past experiences. Additionally, concepts that do not fit into the previous groupings but are considered noteworthy are also displayed with smaller font sizes.

In [None]:
import json

# JSON data to save
data = {
    "words": {
        "Full Name": 4,
        "Affiliation": 3,
        "Experience 1": 2,
        "Experience 2": 2,
        "Research topic 1": 3,
        "Research topic 2": 3,
        "Research topic 3": 3,
        "Other 1": 1,
        "Other 2": 1
    },
    "descriptions": {
        "Full Name": "Name of the interviewee.",
        "Affiliation": "Academic institution where she/he works.",
        "Experience 1": "Add the relevant content by clicking on a word in the cloud and using the email function linked to that word in the box Selected Words.",
        "Experience 2": "Add the relevant content by clicking on a word in the cloud and using the email function linked to that word in the box Selected Words.",
        "Research topic 1": "Add the relevant content by clicking on a word in the cloud and using the email function linked to that word in the box Selected Words.",
        "Research topic 2": "Add the relevant content by clicking on a word in the cloud and using the email function linked to that word in the box Selected Words.",
        "Research topic 3": "Add the relevant content by clicking on a word in the cloud and using the email function linked to that word in the box Selected Words.",
        "Other 1": "Add the relevant content by clicking on a word in the cloud and using the email function linked to that word in the box Selected Words.",
        "Other 2": "Add the relevant content by clicking on a word in the cloud and using the email function linked to that word in the box Selected Words."
    }
}

# Save the JSON to a file
with open('individual_researcher_keywords.json', 'w', encoding='utf-8') as json_file:
    json.dump(data, json_file, ensure_ascii=False, indent=4)

print("The file 'individual_researcher_keywords.json' has been successfully saved.")


## Keywords of All Interviewees

All identified keywords are collected in a JSON file named `all_researchers_keywords.json`. This file allows users to check if the same keyword has been used by other interviewees. If the keyword appears in other thematic clouds, users can see who else has used it and consider reaching out to explore potential collaborations based on shared interests.

In [None]:
import json

# Data to save
all_keywords = [
    {"word": "Experience 1", "name": "Full Name"},
    {"word": "Experience 2", "name": "Full Name"},
    {"word": "Research topic 1", "name": "Full Name"},
    {"word": "Research topic 2", "name": "Full Name"},
]

# JSON file name
file_name = "all_researchers_keywords.json"

# Save the list to the JSON file
with open(file_name, 'w', encoding='utf-8') as file:
    json.dump(all_keywords, file, ensure_ascii=False, indent=4)

print(f"File successfully saved to {file_name}")


# ETIC Thematic Clouds
This Python script generates a dynamic word cloud web application using the two files generated in the previous cells `individual_researcher_keywords.json` and `all_researchers_keywords.json`. It creates necessary directories and files for CSS, JavaScript, and HTML, defines the styles and behavior for the application, and processes keywords, descriptions, and media files to visualize them interactively. Users can click on words in the cloud to select them, view descriptions, and access related media. Selected words can be emailed or removed, with functionality to save or reset the word cloud. A local web server is started to host the application, which opens automatically in the default browser for user interaction.           

In [None]:
import webbrowser
import os
import json
import glob

def create_directories_and_files():
    """
    Creates necessary directories ('css' and 'js') and writes default CSS and JavaScript files.
    Ensures the required folder structure and styles/scripts are set up for the web application.
    """

    os.makedirs('css', exist_ok=True)
    os.makedirs('js', exist_ok=True)

    css_content = """
    @import url('https://fonts.googleapis.com/css2?family=Pacifico&display=swap');
    body {
        font-family: Verdana, Arial, Helvetica, sans-serif;
        display: flex;
        flex-direction: column;
        align-items: center;
        background-color: #f0f0f0;
        margin: 0;
        padding: 0;
    }
    nav {
        width: 100%;
        background-color: #007bff;
        padding: 10px 20px;
        color: white;
        text-align: center;
        font-size: 24px;
        font-weight: bold;
    }
    .cloud-text {
        font-family: 'Pacifico', cursive;
        font-size: 48px;
        color: #f0f0f0;
        text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.2);
    }
    .version {
        font-size: 12px;
    }
    #description {
        padding: 10px;
        text-align: center;
        background-color: white;
        border-radius: 10px;
        box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
        margin: 20px 15px;
    }
    #controls {
        display: flex;
        justify-content: flex-start;
        align-items: center;
        gap: 20px;
        margin-bottom: 20px;
        flex-wrap: wrap;
    }
    #reset-button, #save-button {
        padding: 10px 20px;
        font-size: 16px;
        background-color: #007bff;
        color: white;
        border: none;
        border-radius: 5px;
        cursor: pointer;
        transition: background-color 0.3s;
    }
    #reset-button:hover, #save-button:hover {
        background-color: #0056b3;
    }
    #search-input {
        padding: 10px;
        font-size: 16px;
        width: 300px;
        border: 1px solid #ccc;
        border-radius: 5px;
    }
    #slider-container {
        display: flex;
        align-items: center;
        gap: 10px;
    }
    #word-size-slider {
        width: 200px;
    }
    #content {
        display: flex;
        flex-direction: row;
        gap: 20px;
        width: 100%;
        max-width: 1200px;
        flex-wrap: wrap;
    }
    #wordcloud-container {
        display: flex;
        flex-direction: column;
        align-items: flex-start;;
        background-color: white;
        padding: 20px;
        border-radius: 10px;
        box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
        flex-grow: 1;
        max-width: 800px;
        min-width: 300px;
    }
    #selected-words-container {
        width: 200px;
        max-width: 300px;
        padding: 20px;
        border: 1px solid #ccc;
        background-color: white;
        border-radius: 10px;
        box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
        overflow-y: auto;
        max-height: 400px;
    }
    .word {
        cursor: pointer;
        transition: fill 0.5s, transform 0.5s;
        position: relative;
    }
    .word:hover {
        font-weight: bold;
    }
    .highlight {
        font-size: 1.5em !important;
        fill: red !important;
    }
    .highlight-bar {
        fill: red !important;
    }
    .selected {
        text-decoration: line-through;
        pointer-events: none;
        fill: grey !important;
    }
    #description-container {
        margin-top: 10px;
        background-color: #f9f9f9;
        padding: 10px;
        border-top: 1px solid #ccc;
        text-align: center;
        width: 100%;
        box-shadow: 0px -2px 10px rgba(0, 0, 0, 0.1);
        border-radius: 10px;
    }
    #description-text {
        margin: 0;
        padding: 0;
        font-size: 14px;
        color: black;
    }
    .media-content {
        margin-top: 10px;
        display: flex;
        flex-wrap: wrap;
        gap: 10px;
    }
    .media-content img, .media-content video {
        border: 1px solid #ccc;
        border-radius: 5px;
        padding: 5px;
        background-color: white;
    }
    .media-content img:hover, .media-content video:hover {
        border-color: #007bff;
    }
    #selected-words-container div {
        display: flex;
        flex-direction: column;
        align-items: flex-start;
        margin: 5px 0;
        font-size: 16px;
        word-wrap: break-word;
        transition: color 0.5s;
    }
    #selected-words-container div:hover {
        color: red;
    }
    .email-button, .remove-button, .search-button {
        padding: 5px 10px;
        font-size: 14px;
        border: none;
        border-radius: 5px;
        cursor: pointer;
        transition: background-color 0.3s;
        width: 100%;
        box-sizing: border-box;
    }
    .email-button {
        background-color: #28a745;
        color: white;
    }
    .email-button:hover {
        background-color: #218838;
    }
    .remove-button {
        background-color: #dc3545;
        color: white;
    }
    .remove-button:hover {
        background-color: #c82333;
    }
    .search-button {
        background-color: #ffc107;
        color: black;
    }
    .search-button:hover {
        background-color: #e0a800;
    }
    .pushpin {
        font-family: 'Segoe UI Emoji';
        border-radius: 50%;
        cursor: pointer;
        display: flex;
        align-items: center;
        justify-content: center;
        transition: background-color 0.3s, transform 0.3s;
        width: 25px;
        height: 25px;
        border: 2px solid black;
    }
    .pushpin.pinned {
        transform: scale(1.4);
    }
    .pin-group {
        display: flex;
        align-items: center;
        gap: 5px;
    }
    .context-menu {
        position: absolute;
        background-color: white;
        border: 1px solid #ccc;
        box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
        z-index: 1000;
        display: none;
        flex-direction: column;
        gap: 10px;
        padding: 10px;
        border-radius: 5px;
    }
    .context-menu-item {
        padding: 5px 10px;
        cursor: pointer;
    }
    .context-menu-item:hover {
        background-color: #f0f0f0;
    }
    .modal {
        display: none;
        position: fixed;
        z-index: 1001;
        left: 0;
        top: 0;
        width: 100%;
        height: 100%;
        overflow: auto;
        background-color: rgba(0,0,0,0.8);
        justify-content: center;
        align-items: center;
    }
    .modal-content {
        background-color: white;
        margin: 15% auto;
        padding: 20px;
        border: 1px solid #888;
        width: 80%;
        max-width: 700px;
        text-align: center;
        border-radius: 10px;
    }
    .modal-content img, .modal-content video {
        max-width: 100%;
        height: auto;
    }
    .close {
        color: #aaa;
        float: right;
        font-size: 28px;
        font-weight: bold;
    }
    .close:hover,
    .close:focus {
        color: black;
        text-decoration: none;
        cursor: pointer;
    }
    @media (max-width: 768px) {
        #content {
            flex-direction: column;
        }
        #selected-words-container {
            max-height: 500px;
            width: 100%;
        }
        nav {
            font-size: 20px;
        }
    }
    footer {
        background-color: #007bff;
        color: white;
        text-align: center;
        padding: 10px;
        width: 100%;
        font-size: 14px;
        margin-top: 20px;
    }
    #points-badge {
        display: flex;
        justify-content: center;
        align-items: center;
        padding: 10px;
        background-color: gold;
        border-radius: 50%;
        width: 50px;
        height: 50px;
        font-size: 18px;
        font-weight: bold;
        color: black;
        position: absolute;
        top: 10px;
        right: 10px;
    }
    #frequency-chart {
        width: 100%;
        max-width: 600px;
        height: 400px;
        margin-top: 20px;
    }
    """
    with open('css/style.css', 'w') as css_file:
        css_file.write(css_content)

    js_content = """
    document.addEventListener("DOMContentLoaded", function() {
        var allWords = {};
        var words = JSON.parse(document.getElementById('words-data').textContent);
        var descriptions = JSON.parse(document.getElementById('descriptions-data').textContent);
        var predefined_list = JSON.parse(document.getElementById('predefined-list').textContent);
        var mediaFiles = JSON.parse(document.getElementById('media-files').textContent);
        var points = localStorage.getItem("points") ? parseInt(localStorage.getItem("points")) : 0;
        var pointsBadge = document.getElementById("points-badge");
        
        // Custom high-contrast color scheme with 15 colors
        var fill = d3.scaleOrdinal([
            "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
            "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf",
            "#393b79", "#637939", "#8c6d31", "#843c39", "#5254a3"
        ]);

        var selectedWords = new Set();
        var pinnedWord = null;
        var isPinned = false; // Flag to track if a word is pinned

        max_freq = Math.max(...Object.values(words));
        min_freq = Math.min(...Object.values(words));
        scale_factor = 20 / Math.log(max_freq - min_freq + 1);
        allWords = Object.fromEntries(Object.entries(words).map(([word, freq]) => [word, 28 + scale_factor * Math.log((freq - min_freq) + 1)]));

        var showContextMenuMessage = sessionStorage.getItem("showContextMenuMessage") !== "false";

        function draw(words) {
            var svg = d3.select("#wordcloud-svg").selectAll("*").remove();
            var width = document.getElementById('wordcloud-container').clientWidth;
            var height = document.getElementById('wordcloud-container').clientHeight;
            svg = d3.select("#wordcloud-svg")
                .attr("width", width)
                .attr("height", height)
                .append("g")
                .attr("transform", "translate(" + width / 2 + "," + height / 2 + ")");

            var text = svg.selectAll("text")
                .data(words)
                .enter().append("text")
                .attr("class", function(d) { return selectedWords.has(d.text) ? "word selected" : "word"; })
                .style("font-size", function(d) { return d.size + "px"; })
                .style("fill", function(d, i) { return fill(i); })
                .attr("text-anchor", "middle")
                .attr("transform", function(d) {
                    return "translate(" + [d.x, d.y] + ")rotate(" + d.rotate + ")";
                })
                .text(function(d) { return d.text; })
                .on("mouseover", function(event, d) {
                    if (!isPinned) {
                        showDescription(d.text);
                    }
                })
                .on("mouseout", function(event, d) {
                    if (!isPinned) {
                        clearDescription();
                    }
                })
                .on("click", function(event, d) {
                    if (selectedWords.has(d.text)) return;
                    d3.select(this).transition()
                        .duration(500)
                        .attr("transform", "translate(" + (width / 2 + Math.random() * 200 - 100) + "," + (height / 2 + Math.random() * 200 - 100) + ")rotate(0)")
                        .style("opacity", 0)
                        .remove();

                    selectedWords.add(d.text);

                    var wordDiv = document.createElement("div");
                    wordDiv.textContent = d.text;
                    wordDiv.style.color = fill(d.index);

                    var buttonContainer = document.createElement("div");
                    buttonContainer.style.display = "flex";
                    buttonContainer.style.gap = "5px";
                    buttonContainer.style.flexWrap = "wrap";

                    var searchButton = document.createElement("button");
                    searchButton.textContent = "Search";
                    searchButton.className = "search-button";
                    searchButton.style.flex = "1 1 auto";

                    
searchButton.onclick = function() {
    var searchText = d.text.toLowerCase(); // Usa l'intera stringa per la ricerca
    var matches = predefined_list.filter(item => {
        var predefinedText = item.word.toLowerCase(); // Confronta l'intera stringa
        return predefinedText.includes(searchText); // Verifica se la stringa cercata è contenuta
    });

    if (matches.length > 0) {
        var names = matches.map(match => `${match.name} (${match.word})`).join(", ");
        alert(`"${d.text}" The same keyword has been found in your colleagues' word cloud. Names: ${names}`);
    } else {
        alert(`"${d.text}" The keyword has not been found in your colleagues' word cloud.`);
    }
};

                    
                    buttonContainer.appendChild(searchButton);

                    var emailButton = document.createElement("button");
                    emailButton.textContent = "Email";
                    emailButton.className = "email-button";
                    emailButton.style.flex = "1 1 auto";
                    emailButton.onclick = function() {
                        var subject = "Descrizione: " + d.text;
                        window.location.href = 'mailto:giuseppe.greco@pg.infn.it?subject=' + encodeURIComponent(subject);
                        points += 10;  // Aggiungi 10 punti per ogni email inviata
                        updatePointsBadge();
                        localStorage.setItem("points", points);  // Salva i punti in localStorage
                    };
                    buttonContainer.appendChild(emailButton);

                    var removeButton = document.createElement("button");
                    removeButton.textContent = "Remove";
                    removeButton.className = "remove-button";
                    removeButton.style.flex = "1 1 auto";
                    removeButton.onclick = function() {
                        wordDiv.remove();
                        selectedWords.delete(d.text);
                        generateWordCloud();
                    };
                    buttonContainer.appendChild(removeButton);

                    wordDiv.appendChild(buttonContainer);
                    document.getElementById("selected-words-container").appendChild(wordDiv);

                    saveSelectedWords();
                    showDescription(d.text);  // Show description and media
                });

            var pinGroup = svg.selectAll("g.pin-group")
                .data(words)
                .enter().append("g")
                .attr("class", "pin-group")
                .attr("transform", function(d) {
                    return "translate(" + [d.x, d.y] + ")";
                });

            pinGroup.append("foreignObject")
                .attr("width", function(d) { return d.size + "px"; })
                .attr("height", function(d) { return d.size + "px"; })
                .attr("x", function(d) { return d.size / 2 + "px"; })
                .attr("y", function(d) { return -d.size / 2 + "px"; })
                .append("xhtml:div")
                .attr("class", "pushpin")
                .style("background-color", function(d, i) { return fill(i) + "b3"; })  // 70% transparency
                .on("click", function(event, d) {
                    d3.selectAll(".pushpin").classed("pinned", false).html(''); // Reset all pins
                    var currentPin = d3.select(this);
                    if (pinnedWord === d.text) {
                        pinnedWord = null;
                        isPinned = false;
                        clearDescription();
                        currentPin.classed("pinned", false);
                    } else {
                        pinnedWord = d.text;
                        isPinned = true;
                        showDescription(d.text);
                        currentPin.classed("pinned", true);
                        currentPin.html('📌'); // Add pin icon
                    }
                    d3.selectAll(".pushpin").classed("pinned", function(t) {
                        return t.text === pinnedWord;
                    });

                    if (showContextMenuMessage) {
                        var contextMenu = document.getElementById("context-menu");
                        contextMenu.innerHTML = `
                            <div class='context-menu-item'>
                                Click to pin or unpin a word. Pinned words' descriptions are always shown.
                                <button id="hide-context-menu" style="margin-top: 10px;">Don't show again</button>
                            </div>`;
                        contextMenu.style.display = "flex";
                        contextMenu.style.left = event.pageX + "px";
                        contextMenu.style.top = event.pageY + "px";
                        event.stopPropagation();

                        document.getElementById("hide-context-menu").onclick = function() {
                            showContextMenuMessage = false;
                            contextMenu.style.display = "none";
                        };
                    }
                });
        }

        function showDescription(word) {
            if (isPinned && pinnedWord !== word) {
                return;
            }
            var description = descriptions[word] || "No description available";
            var descriptionText = document.getElementById("description-text");
            descriptionText.innerHTML = description;

            var mediaContent = document.createElement("div");
            mediaContent.className = "media-content";

            var files = mediaFiles[word.toLowerCase().replace(/\\s+/g, '_')] || [];
            if (files.length > 0) {
                files.forEach(function(file) {
                    var mediaElement;
                    if (file.endsWith(".mp4")) {
                        mediaElement = document.createElement("video");
                        mediaElement.width = 320;
                        mediaElement.height = 240;
                        mediaElement.controls = true;
                        var source = document.createElement("source");
                        source.src = file;
                        source.type = "video/mp4";
                        mediaElement.appendChild(source);
                    } else if (file.endsWith(".jpg") || file.endsWith(".png")) {
                        mediaElement = document.createElement("img");
                        mediaElement.src = file;
                        mediaElement.width = 320;
                        mediaElement.height = 240;
                        mediaElement.onclick = function() {
                            openModal(file);
                        };
                    }
                    mediaContent.appendChild(mediaElement);
                });
            } else {
                var noMediaMessage = document.createElement("p");
                noMediaMessage.textContent = "No media files found";
                mediaContent.appendChild(noMediaMessage);
            }

            descriptionText.appendChild(mediaContent);
        }

        function openModal(src) {
            var modal = document.getElementById("myModal");
            var modalImg = document.getElementById("modal-content");

            modal.style.display = "flex";
            if (src.endsWith(".mp4")) {
                modalImg.innerHTML = '<video width="100%" height="auto" controls><source src="' + src + '" type="video/mp4"></video>';
            } else {
                modalImg.innerHTML = '<img src="' + src + '" style="width:100%">';
            }

            var span = document.getElementsByClassName("close")[0];
            span.onclick = function() {
                modal.style.display = "none";
            };
        }

        function clearDescription() {
            if (isPinned) {
                return;
            }
            var descriptionText = document.getElementById("description-text");
            descriptionText.innerHTML = "";
        }

        function saveSelectedWords() {
            var selectedWordsArray = Array.from(selectedWords);
            localStorage.setItem("selectedWords", JSON.stringify(selectedWordsArray));
        }

        function loadSelectedWords() {
            var selectedWordsArray = JSON.parse(localStorage.getItem("selectedWords"));
            if (selectedWordsArray) {
                selectedWordsArray.forEach(function(word) {
                    selectedWords.add(word);
                    var wordDiv = document.createElement("div");
                    wordDiv.textContent = word;
                    wordDiv.style.color = fill(selectedWords.size);

                    var buttonContainer = document.createElement("div");
                    buttonContainer.style.display = "flex";
                    buttonContainer.style.gap = "5px";
                    buttonContainer.style.flexWrap = "wrap";

                    var searchButton = document.createElement("button");
                    searchButton.textContent = "Search";
                    searchButton.className = "search-button";
                    searchButton.style.flex = "1 1 auto";
                    searchButton.onclick = function() {
                        var matches = predefined_list.filter(item => item.word === word);
                        if (matches.length > 0) {
                            var names = matches.map(match => match.name).join(", ");
                            alert(word + " found in the keyword list: Names: " + names);
                        } else {
                            alert(word + " not found in the keyword list.");
                        }
                    };
                    buttonContainer.appendChild(searchButton);

                    var emailButton = document.createElement("button");
                    emailButton.textContent = "Email";
                    emailButton.className = "email-button";
                    emailButton.style.flex = "1 1 auto";
                    emailButton.onclick = function() {
                        var subject = "Descrizione: " + word;
                        window.location.href = 'mailto:gius?subject=' + encodeURIComponent(subject);
                        points += 10;  // Aggiungi 10 punti per ogni email inviata
                        updatePointsBadge();
                        localStorage.setItem("points", points);  // Salva i punti in localStorage
                    };
                    buttonContainer.appendChild(emailButton);

                    var removeButton = document.createElement("button");
                    removeButton.textContent = "Remove";
                    removeButton.className = "remove-button";
                    removeButton.style.flex = "1 1 auto";
                    removeButton.onclick = function() {
                        wordDiv.remove();
                        selectedWords.delete(word);
                        generateWordCloud();
                    };
                    buttonContainer.appendChild(removeButton);

                    wordDiv.appendChild(buttonContainer);
                    document.getElementById("selected-words-container").appendChild(wordDiv);
                });
            }
        }

        function updatePointsBadge() {
            pointsBadge.textContent = points;
        }

        function generateWordCloud(sizeMultiplier = 1) {
            var width = document.getElementById('wordcloud-container').clientWidth;
            var height = 600;
            d3.layout.cloud()
                .size([width, height])
                .words(Object.keys(allWords).filter(word => !selectedWords.has(word)).map(function(d) {
                    return {'text': d, 'size': allWords[d] * sizeMultiplier};
                }))
                .padding(10)
                .rotate(function(d) { return (d.size > 50 ? 0 : ~~(Math.random() * 2) * 90); })
                .font("Impact")
                .fontSize(function(d) { return d.size; })
                .on("end", function(words, bounds) {
                    var width = bounds[1].x - bounds[0].x + 20;
                    var height = bounds[1].y - bounds[0].y + 20;
                    d3.select("#wordcloud-svg")
                        .attr("width", width)
                        .attr("height", height)
                        .append("g")
                        .attr("transform", "translate(" + width / 2 + "," + height / 2 + ")");
                    draw(words);
                    generateFrequencyChart();
                })
                .start();
        }

        function generateFrequencyChart() {
            var chartData = Object.keys(allWords).map(function(d) {
                return {'word': d, 'frequency': allWords[d]};
            });
            chartData.sort(function(a, b) {
                return b.frequency - a.frequency;
            });
            
            var svg = d3.select("#frequency-chart").html("").append("svg")
                .attr("width", 600)
                .attr("height", 400)
                .append("g")
                .attr("transform", "translate(50, 20)");

            var x = d3.scaleBand()
                .range([0, 500])
                .padding(0.1)
                .domain(chartData.map(function(d) { return d.word; }));
            var y = d3.scaleLinear()
                .range([300, 0])
                .domain([0, d3.max(chartData, function(d) { return d.frequency; })]);

            svg.append("g")
                .attr("class", "x axis")
                .attr("transform", "translate(0,300)")
                .call(d3.axisBottom(x))
                .selectAll("text")
                .attr("y", 0)
                .attr("x", 9)
                .attr("dy", ".35em")
                .attr("transform", "rotate(45)")
                .style("text-anchor", "start");

            svg.selectAll(".bar")
                .data(chartData)
                .enter().append("rect")
                .attr("class", "bar")
                .attr("id", function(d) { return "bar-" + d.word.replace(/\\s+/g, '-'); })
                .attr("x", function(d) { return x(d.word); })
                .attr("width", x.bandwidth())
                .attr("y", function(d) { return y(d.frequency); })
                .attr("height", function(d) { return 300 - y(d.frequency); })
                .style("fill", function(d, i) { return fill(i); })
                .on("click", function(event, d) {
                    var barElement = d3.select(this);
                    var isHighlighted = barElement.classed("highlight-bar");
                    d3.selectAll("#wordcloud-svg .word").classed("highlight", false).transition().style("font-size", function(d) { return d.size + "px"; }).style("fill", function(d, i) { return fill(i); });
                    d3.selectAll(".bar").classed("highlight-bar", false);
                    if (!isHighlighted) {
                        barElement.classed("highlight-bar", true);
                        d3.selectAll("#wordcloud-svg .word").filter(function(word) { return word.text === d.word; }).classed("highlight", true).transition().style("font-size", function(word) { return word.size * 1.5 + "px"; }).style("fill", "red");
                    }
                });
        }

        document.getElementById("reset-button").onclick = function() {
            document.getElementById("selected-words-container").innerHTML = "<h3>Selected Words</h3>";
            words = {...allWords};
            selectedWords.clear();
            generateWordCloud();
            localStorage.removeItem("selectedWords");
        };

        document.getElementById("save-button").onclick = function() {
            html2canvas(document.querySelector("#wordcloud-container")).then(canvas => {
                var link = document.createElement('a');
                link.download = 'wordcloud.jpg';
                link.href = canvas.toDataURL('image/jpeg');
                link.click();
            });
        };

        document.getElementById("search-input").oninput = function() {
            var searchValue = this.value.toLowerCase();
            var filteredWords = Object.keys(words)
                .filter(word => word.toLowerCase().includes(searchValue))
                .map(word => { return {'text': word, 'size': allWords[word]}; }); // Keep the original size
            d3.layout.cloud()
                .size([600, 400])
                .words(filteredWords)
                .padding(10)
                .rotate(function() { return 0; })
                .font("Impact")
                .fontSize(function(d) { return d.size; }) // Keep the original size
                .on("end", draw)
                .start();
        };

        document.getElementById("word-size-slider").oninput = function() {
            var sizeMultiplier = this.value / 50; // Normalize around 1
            generateWordCloud(sizeMultiplier);
        };

        generateWordCloud();
        loadSelectedWords();
        updatePointsBadge();

        // Hide context menu when clicking anywhere else
        document.addEventListener("click", function() {
            var contextMenu = document.getElementById("context-menu");
            contextMenu.style.display = "none";
        });
    });
    """
    with open('js/scripts.js', 'w') as js_file:
        js_file.write(js_content)

def get_media_files():
    media_files = {}
    for media_file in glob.glob('media/*.*'):
        base_name = os.path.basename(media_file)
        word_key = '_'.join(base_name.split('_')[:-1]).lower()
        print(f"Processing file: {base_name}, extracted key: {word_key}")

        if word_key not in media_files:
            media_files[word_key] = []
        media_files[word_key].append(media_file)

    #print("Media files dictionary:", media_files)
    return media_files

def generate_wordcloud(word_file, predefined_list):
    """
    Generates an interactive word cloud web application.
    This function processes the input data, creates the required HTML, CSS, and JavaScript files,
    and launches the application in a web browser. The word cloud allows users to interact
    with the displayed words, view their descriptions, manage selections, and send related information.
    """
        
    try:
        create_directories_and_files()

        with open(word_file, 'r') as file:
            data = json.load(file)
            words = data['words']
            descriptions = data['descriptions']

        media_files = get_media_files()

        html_content = f"""
        <!DOCTYPE html>
        <html lang="en">
        <head>
            <meta charset="UTF-8">
            <title>ETIC Thematic Clouds</title>
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <link rel="stylesheet" href="css/style.css">
            <script src="https://d3js.org/d3.v7.min.js"></script>
            <script src="https://cdnjs.cloudflare.com/ajax/libs/d3-cloud/1.2.7/d3.layout.cloud.min.js"></script>
            <script src="https://html2canvas.hertzen.com/dist/html2canvas.min.js"></script>
        </head>
        <body>
            <nav><span class="cloud-text">ETIC Thematic Clouds</span> <span style="font-size: 12px;">Version 0.0.5</span></nav>
            <div id="points-badge">0</div>
            <div id="description">
                <p> ETIC Thematic Clouds is a resource aimed at gathering information, including texts, videos, and images.
                The word cloud is generated through the transcription of the 
                <a href="https://www.youtube.com/playlist?list=PLP9pGhkLJlAXdXbzVvcl4snkAMkgTEjNH" target="_blank" rel="noopener noreferrer">interviews</a>
                conducted by the new hires of the 
                <a href="https://www.einstein-telescope.it/et-in-italia/" target="_blank" rel="noopener noreferrer"> ETIC (Einstein Telescope Infrastructure Consortium)</a>
                project. Users have the option to click on any word in the cloud, which will then be
                moved to a specific table. By pressing the "email" button, users can send additional material related to that word,
                providing a detailed description of the concept and their work. This material will be used to populate
                the contextual menu associated with each word in the cloud, further facilitating exploration and access
                to relevant information. The material will be used by the Communication Office to create new outreach and social media resources.
                </p>




            </div>
            <div id="controls">
                <button id="reset-button">Reset Wordcloud</button>
                <button id="save-button">Save Wordcloud</button>
                <input type="text" id="search-input" placeholder="Search for a word...">
                <div id="slider-container">
                    <label for="word-size-slider">Word Size: </label>
                    <input type="range" id="word-size-slider" min="10" max="100" value="50">
                </div>
            </div>
            <div id="content">
                <div id="wordcloud-container">
                    <svg id="wordcloud-svg"></svg>
                    <div id="description-container"><p id="description-text"></p></div>
                </div>
                <div id="selected-words-container">
                    <h3>Selected Words</h3>
                </div>
               <!-- <div id="frequency-chart"></div> -->
            </div>
            <footer>
                  Developed by Giuseppe Greco - Scientific Assistant of the ETIC Project -
                  <a href="mailto:giuseppe.greco@pg.infn.it" style="color: white; text-decoration: underline;">Contatta</a>
            </footer>
            <div id="myModal" class="modal">
                <div class="modal-content" id="modal-content"></div>
                <span class="close">&times;</span>
            </div>
            <div id="context-menu" class="context-menu"></div>
            <script id="words-data" type="application/json">{json.dumps(words)}</script>
            <script id="descriptions-data" type="application/json">{json.dumps(descriptions)}</script>
            <script id="predefined-list" type="application/json">{json.dumps(predefined_list)}</script>
            <script id="media-files" type="application/json">{json.dumps(media_files)}</script>
            <script src="js/scripts.js"></script>
        </body>
        </html>
        """

        with open('index.html', 'w') as html_file:
            html_file.write(html_content)

        webbrowser.open('file://' + os.path.abspath('index.html'))

    except FileNotFoundError:
        print(f"File {word_file} not found. Please check the file path and try again.")
    except json.JSONDecodeError:
        print("Error decoding JSON. Please check the format of the JSON file.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

def load_predefined_list(file_path):
    """Loading all keywords from a JSON."""
    try:
        with open(file_path, 'r') as file:
            return json.load(file)
    except FileNotFoundError:
        print(f"File {file_path} non trovato.")
        return []
    except json.JSONDecodeError:
        print("Errore di decodifica JSON.")
        return []

all_researchers_keywords = load_predefined_list('all_researchers_keywords.json')

def start_server():
    """
    Starts a local HTTP server to host the web application.

    This function launches an HTTP server on `http://localhost:8000`, 
    automatically opens the application in the default web browser, 
    and serves the application files. The server can be stopped using a keyboard interrupt.
    """
    try:
        # Inform the user that the server is starting
        print("Starting server at http://localhost:8000...")
        # Open the web application in the default browser
        webbrowser.open("http://localhost:8000/index.html")
        # Start the HTTP server using the built-in Python module
        os.system("python -m http.server")
    except KeyboardInterrupt:
        # Handle keyboard interrupt to stop the server gracefully
        print("\nServer stopped.")

generate_wordcloud('individual_researcher_keywords.json', all_researchers_keywords)
start_server()
