# Imports the libraries and functions that will be necessary to create mind maps from PDFs.

In [None]:
"""
This module handles AI-powered mind map generation using the Gemini API.
It includes utilities for retrying API calls, environment variable management,
and Jupyter notebook display enhancements.
"""

# Standard library imports
import os
import time
import datetime
import subprocess
import threading
import itertools

# Third-party imports
from tenacity import retry, stop_after_attempt, wait_exponential
from dotenv import load_dotenv
import google.generativeai as genai

# Jupyter-specific imports
from IPython.display import Image, display, clear_output
from ipywidgets import widgets, IntProgress, HBox, Label, Button, Layout

# Load environment variables from 'keys.env' file
# This file should contain the GOOGLE_API_KEY
load_dotenv("keys.env")
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

# Configure the Gemini model using the API key
# Ensure the API key is valid and has necessary permissions
genai.configure(api_key=GOOGLE_API_KEY)

# Initialize the Gemini model
# Using 'gemini-1.5-flash' for faster processing
model = genai.GenerativeModel(model_name="gemini-1.5-flash")

# Define the prompt_text for mind map generation
# This multi-line string contains instructions for the AI model
prompt_text = '''
Please create a mind map of this PDF in .puml format with topics and subtopics to be used in plantuml, without indentations.
Ensure that the .puml file contains no more than 90 lines. You are encouraged to fill 60-80 lines.
Maintain the language of the PDF.
The first level should start with '* '. The second level should start with '** ', and so on.
Do not use '-' at the beginning of each topic.
The map should have at least 4 levels.
At the first level, 1 to 10 words are allowed. Try to use this maximum word limit I am giving you to explore concepts.
At the second level, 1 to 10 words are allowed. Try to use this maximum word limit I am giving you to explore concepts.
At the third level, 1 to 20 words are allowed. You are encouraged to form phrases in this level.
At the fourth level, 1 to 20 words are allowed. You are encouraged to form phrases in this level.
Do not include final considerations, complementary materials, bibliographic references, or other topics that do not explicitly explain concepts.
'''

def check_files_in_genai_cloud():
    """
    Check if there are any files present in the Gemini AI cloud storage.

    This function uses the Gemini AI API to list files in the cloud storage
    and returns True if at least one file is present, False otherwise.

    Returns:
        bool: True if there are files in the cloud storage, False if it's empty.

    Raises:
        APIError: If there's an error in communicating with the Gemini AI API.

    Example:
        >>> has_files = check_files_in_genai_cloud()
        >>> print(has_files)
        True
    """
    return any(genai.list_files())

def remove_files_from_cloud():
    """
    Remove all files from the Gemini AI cloud storage.

    This function iterates through all files in the Gemini AI cloud storage
    and attempts to delete each one. It provides feedback on the success or
    failure of each deletion operation.

    Returns:
        None

    Raises:
        APIError: If there's an error in communicating with the Gemini AI API.

    Side Effects:
        - Deletes files from the Gemini AI cloud storage.
        - Prints status messages to the console for each file deletion attempt.

    Example:
        >>> remove_files_from_cloud()
        The file "document1.pdf" was removed from the cloud.
        The file "image1.jpg" was removed from the cloud.
        Error removing file "locked_file.txt": Permission denied
    """
    for file in genai.list_files():
        try:
            genai.delete_file(file.name)
            print(f"The file {file.display_name} was removed from the cloud.")
        except Exception as e:
            print(f"Error removing file {file.display_name}: {e}")

def upload_files_to_cloud(inputs_path):
    """
    Upload PDF files from a specified directory to the Gemini AI cloud storage.

    This function scans the given directory for PDF files, sorts them by creation time,
    and uploads them to the Gemini AI cloud storage. It provides feedback on the success
    or failure of each upload operation.

    Args:
        inputs_path (str): The path to the directory containing PDF files to upload.

    Returns:
        list: A list of successfully uploaded file objects from the Gemini AI API.

    Raises:
        APIError: If there's an error in communicating with the Gemini AI API.

    Side Effects:
        - Uploads files to the Gemini AI cloud storage.
        - Prints status messages to the console for each file upload attempt.
        - Introduces a 1-second delay between uploads to avoid overwhelming the API.

    Example:
        >>> uploaded = upload_files_to_cloud("/path/to/pdf/directory")
        Uploaded file 'document1.pdf' as: gemini://abc123
        Uploaded file 'document2.pdf' as: gemini://def456
        Error uploading file large_file.pdf: File size exceeds limit
        >>> print(len(uploaded))
        2
    """
    if not os.path.exists(inputs_path):
        print(f"Directory {inputs_path} does not exist.")
        return []

    files = [os.path.join(inputs_path, pdf) for pdf in os.listdir(inputs_path) if pdf.endswith('.pdf')]
    files.sort(key=os.path.getctime)
    
    uploaded_files = []
    for path in files:
        pdf_name = os.path.basename(path)
        try:
            file = genai.upload_file(path=path, display_name=pdf_name)
            print(f"Uploaded file '{file.display_name}' as: {file.uri}")
            uploaded_files.append(file)
            time.sleep(1)
        except Exception as e:
            print(f"Error uploading file {pdf_name}: {e}")
    
    return uploaded_files

# Execute the prompt using the Gemini API, retrieve the response and ensure the response will be in puml mindmap format
def run_API(file, prompt):
    """
    Execute a prompt using the Gemini API and return a PUML mindmap.

    This function sends the given prompt to the Gemini API, retrieves the response,
    and ensures that the response is formatted as a PUML mindmap.

    Args:
        prompt (str): The prompt to send to the Gemini API.

    Returns:
        str: A PUML-formatted mindmap string.

    Raises:
        ValueError: If the response is not in the correct PUML mindmap format.
    """
    response = model.generate_content([file, prompt])
    lines = response.text.split('\n')
    
    # Ensure that the start is '@startmindmap'
    if not lines[0] == '@startmindmap':
        lines.insert(0, '@startmindmap')
    
    # Ensure that the end is '@endmindmap'
    if not lines[-1] == '@endmindmap':
        lines.append('@endmindmap')
    
    # Filter only lines that start with '*', except the first and last
    filtered_lines = ['@startmindmap'] + [line for line in lines[1:-1] if line.startswith('*')] + ['@endmindmap']
    
    str_puml = '\n'.join(filtered_lines)
    return str_puml, filtered_lines

# Check if the created map has at least one level 4 subtopic
def check_criteria(lines):
    """
    Check if the generated mind map has at least one level 4 subtopic.

    This function examines a list of strings representing a mind map structure
    and verifies if there's at least one line starting with four asterisks,
    which indicates a level 4 subtopic in the PUML mind map format.

    Args:
        lines (list of str): A list of strings representing the lines of a PUML mind map.

    Returns:
        bool: True if at least one level 4 subtopic is found, False otherwise.

    Example:
        >>> mind_map_lines = [
        ...     "@startmindmap",
        ...     "* Root",
        ...     "** Level 2",
        ...     "*** Level 3",
        ...     "**** Level 4",
        ...     "@endmindmap"
        ... ]
        >>> result = check_criteria(mind_map_lines)
        >>> print(result)
        True

        >>> simple_map = [
        ...     "@startmindmap",
        ...     "* Root",
        ...     "** Level 2",
        ...     "*** Level 3",
        ...     "@endmindmap"
        ... ]
        >>> result = check_criteria(simple_map)
        >>> print(result)
        False
    """
    return any(line.startswith("**** ") for line in lines)

# Retry file processing up to 5 times with exponential backoff, ensuring generated PUML meets criteria
@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10))
def process_file_with_retry(file, prompt):
    str_puml, lines = run_API(file, prompt)
    if not check_criteria(lines):
        raise ValueError("Generated PUML does not meet criteria")
    return str_puml, lines

def generate_mind_map(str_puml, file_name):
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    output_dir = "outputs"
    os.makedirs(output_dir, exist_ok=True)
    
    path_puml = os.path.join(output_dir, f"{timestamp}_mapa_mental_{os.path.splitext(file_name)[0]}.puml")
    path_png = path_puml[:-4] + "png"
    
    with open(path_puml, "w", encoding="utf-8") as f:
        f.write(str_puml)
    
    plantuml_jar_path = "plantuml_jar\\plantuml-1.2024.7.jar"
    if run_plantuml(path_puml, plantuml_jar_path):
        if os.path.exists(path_png):
            display(Image(path_png))
        else:
            print("Image not found. Check if PlantUML correctly converted the .puml file to .png.")
    else:
        print("Failed to generate the image.")

def run_plantuml(path_puml, plantuml_jar_path):
    try:
        result = subprocess.run(
            ["java", "-Dfile.encoding=UTF-8", "-jar", plantuml_jar_path, path_puml],
            check=True,
            capture_output=True,
            text=True
        )
        print("Image generated successfully!")
        return True
    except subprocess.CalledProcessError as e:
        print(f"Error generating image: {e}")
        print(f"STDOUT: {e.stdout}")
        print(f"STDERR: {e.stderr}")
        return False

def spinning_indicator():
    spinner = itertools.cycle(['-', '/', '|', '\\'])
    while True:
        yield next(spinner)

def update_spinner_controlled(spinner_label, running_event):
    for c in spinning_indicator():
        if not running_event.is_set():
            break
        spinner_label.value = c
        time.sleep(0.1)
    spinner_label.value = ''  # Limpa o spinner quando a thread termina

def process_files(files):
    progress = IntProgress(min=0, max=len(files), description='Files:')
    spinner_label = Label(value='-')
    status_label = Label(value='Processing...')
    
    display(HBox([progress, spinner_label, status_label]))
    
    # Variável para controlar a execução da thread do spinner
    spinner_running = threading.Event()
    spinner_running.set()  # Inicialmente, definimos como True

    spinner_thread = threading.Thread(target=update_spinner_controlled, args=(spinner_label, spinner_running))
    spinner_thread.daemon = True
    spinner_thread.start()

    try:
        total_files = len(files)
        for i, file in enumerate(files):
            status_label.value = f'Processing {file.display_name}...'
            
            try:
                str_puml, _ = process_file_with_retry(file, prompt_text)
                generate_mind_map(str_puml, file.display_name)
            except Exception as e:
                print(f"Failed to process {file.display_name}: {e}")
            finally:
                progress.value = i + 1
                if i < total_files - 1:  # Se não for o último arquivo
                    print("10-second suspension initiated...")
                    for remaining in range(10, 0, -1):
                        status_label.value = f"10-second suspension initiated... {remaining} seconds remaining"
                        time.sleep(1)
                else:
                    print("All files processed.")
        
        status_label.value = 'Finished!'
    finally:
        spinner_running.clear()  # Sinaliza para a thread do spinner parar
        spinner_thread.join(timeout=1)  # Espera a thread do spinner terminar (com timeout)

    print("Script finished! All files in the 'pdf' folder have been scanned to generate mind maps.")

def run_workflow(upload_new_files=True):
    global sorted_files
    
    if upload_new_files:
        if check_files_in_genai_cloud():
            remove_files_from_cloud()
        
        uploaded_files = upload_files_to_cloud(inputs_path)
        sorted_files = sorted(uploaded_files, key=lambda file: file.create_time)
        
        print("Uploaded files:")
        for file in sorted_files:
            print(f"{file.display_name} - {file.create_time}")
    else:
        if not check_files_in_genai_cloud():
            print("No files in the cloud. Please upload new files.")
            return
        
        sorted_files = sorted(list(genai.list_files()), key=lambda file: file.create_time)
        print("Using existing files in the cloud:")
        for file in sorted_files:
            print(f"{file.display_name} - {file.create_time}")
    
    print("Starting map generation...")
    process_files(sorted_files)

# Creating widgets for user interaction
upload_button = widgets.Button(description="Upload and Process", layout=widgets.Layout(width='150px'))
reprocess_button = widgets.Button(description="Reprocess Uploaded Files", layout=widgets.Layout(width='200px'))

upload_button.on_click(lambda _: run_workflow(True))
reprocess_button.on_click(lambda _: run_workflow(False))

# Displaying the buttons
display(widgets.HBox([upload_button, reprocess_button]))

# Defining the inputs directory
inputs_path = "inputs"

In [None]:
print(run_API.__doc__)