# Imports the libraries and functions that will be necessary to create mind maps from PDFs.

In [None]:
import os
import time
import datetime
import subprocess
from tenacity import retry, stop_after_attempt, wait_exponential
from dotenv import load_dotenv
import google.generativeai as genai
from IPython.display import Image, display

# Carrega variáveis de ambiente
load_dotenv("keys.env")
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

# Configura o modelo Gemini
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel(model_name="gemini-1.5-flash")

# Definição do prompt_text
prompt_text = '''
Please create a mind map of this PDF in .puml format with topics and subtopics to be used in plantuml, without indentations.
Ensure that the .puml file contains no more than 90 lines. You are encouraged to fill 60-80 lines.
Maintain the language of the PDF.
The first level should start with '* '. The second level should start with '** ', and so on.
Do not use '-' at the beginning of each topic.
The map should have at least 4 levels.
At the first level, 1 to 10 words are allowed. Try to use this maximum word limit I am giving you to explore concepts.
At the second level, 1 to 10 words are allowed. Try to use this maximum word limit I am giving you to explore concepts.
At the third level, 1 to 20 words are allowed. You are encouraged to form phrases in this level.
At the fourth level, 1 to 20 words are allowed. You are encouraged to form phrases in this level.
Do not include final considerations, complementary materials, bibliographic references, or other topics that do not explicitly explain concepts.
'''

def check_files_in_genai_cloud():
    return any(genai.list_files())

def remove_files_from_cloud():
    for file in genai.list_files():
        try:
            genai.delete_file(file.name)
            print(f"The file {file.display_name} was removed from the cloud.")
        except Exception as e:
            print(f"Error removing file {file.display_name}: {e}")

def upload_files_to_cloud(inputs_path):
    if not os.path.exists(inputs_path):
        print(f"Directory {inputs_path} does not exist.")
        return []

    files = [os.path.join(inputs_path, pdf) for pdf in os.listdir(inputs_path) if pdf.endswith('.pdf')]
    files.sort(key=os.path.getctime)
    
    uploaded_files = []
    for path in files:
        pdf_name = os.path.basename(path)
        try:
            file = genai.upload_file(path=path, display_name=pdf_name)
            print(f"Uploaded file '{file.display_name}' as: {file.uri}")
            uploaded_files.append(file)
            time.sleep(1)
        except Exception as e:
            print(f"Error uploading file {pdf_name}: {e}")
    
    return uploaded_files

def run_API(file, prompt):
    response = model.generate_content([file, prompt])
    lines = response.text.split('\n')
    
    # Processamento das linhas
    if not lines[0].startswith('*') and not lines[0] == '@startmindmap':
        lines = ['@startmindmap'] + lines[1:]
    
    if lines[0].startswith('*'):
        lines = ['@startmindmap'] + lines
    
    if not lines[-1].startswith('*') and not lines[-1] == '@endmindmap':
        lines = lines[0:-1] + ['@endmindmap']
    
    if lines[-1].startswith('*'):
        lines = lines + ['@endmindmap']
    
    lines[1:-1] = [line for line in lines[1:-1] if line.startswith('*')]
    
    str_puml = '\n'.join(lines)
    return str_puml, lines

def check_criteria(lines):
    return any(line.startswith("**** ") for line in lines)

@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10))
def process_file_with_retry(file, prompt):
    str_puml, lines = run_API(file, prompt)
    if not check_criteria(lines):
        raise ValueError("Generated PUML does not meet criteria")
    return str_puml, lines

def generate_mind_map(str_puml, file_name):
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    output_dir = "outputs"
    os.makedirs(output_dir, exist_ok=True)
    
    path_puml = os.path.join(output_dir, f"{timestamp}_mapa_mental_{os.path.splitext(file_name)[0]}.puml")
    path_png = path_puml[:-4] + "png"
    
    with open(path_puml, "w", encoding="utf-8") as f:
        f.write(str_puml)
    
    plantuml_jar_path = "plantuml_jar\\plantuml-1.2024.7.jar"
    if run_plantuml(path_puml, plantuml_jar_path):
        if os.path.exists(path_png):
            display(Image(path_png))
        else:
            print("Image not found. Check if PlantUML correctly converted the .puml file to .png.")
    else:
        print("Failed to generate the image.")

def run_plantuml(path_puml, plantuml_jar_path):
    try:
        result = subprocess.run(
            ["java", "-Dfile.encoding=UTF-8", "-jar", plantuml_jar_path, path_puml],
            check=True,
            capture_output=True,
            text=True
        )
        print("Image generated successfully!")
        return True
    except subprocess.CalledProcessError as e:
        print(f"Error generating image: {e}")
        print(f"STDOUT: {e.stdout}")
        print(f"STDERR: {e.stderr}")
        return False

def process_files(sorted_files):
    for file in sorted_files:
        try:
            str_puml, _ = process_file_with_retry(file, prompt_text)
            generate_mind_map(str_puml, file.display_name)
        except Exception as e:
            print(f"Failed to process {file.display_name}: {e}")
        finally:
            print("10-second suspension initiated...")
            time.sleep(10)
    
    print("Script finished! All files in the 'pdf' folder have been scanned to generate mind maps.")

if __name__ == "__main__":
    inputs_path = "inputs"
    
    if check_files_in_genai_cloud():
        remove_files_from_cloud()
    
    uploaded_files = upload_files_to_cloud(inputs_path)
    sorted_files = sorted(uploaded_files, key=lambda file: file.create_time)
    
    for file in sorted_files:
        print(f"{file.display_name} - {file.create_time}")
    
    process_files(sorted_files)

# Initializes the Genai model and uploads PDFs to the Genai cloud
Run this code to send the PDFs from your machine, located in the "pdf" folder of this project, to Google's Genai cloud.

In [None]:
load_dotenv("keys.env")  # loads variables from keys.env file
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

#### CONFIGURES THE LARGE LANGUAGE MODEL (LLM) GEMINI ####
genai.configure(api_key=GOOGLE_API_KEY) # Configures genai with GOOGLE_API_KEY
model = genai.GenerativeModel(model_name="gemini-1.5-flash") # Defines the Gemini model.

check_files_in_genai_cloud() # Checks if there are files in the Genai cloud

#### REMOVES ALL FILES THAT WERE UPLOADED TO THE GENAI CLOUD (google.generativeai) ####
if filesExistInCloud:
    for file in genai.list_files(): # Starts the loop in the list of files in the genai cloud
        genai.delete_file(file.name) # Delete file from genai cloud
        print(f"The file {file.display_name} was removed from the cloud.")

check_files_in_genai_cloud() # Checks if there are files in the Genai cloud

#### UPLOADS FILES TO THE GENAI CLOUD (google.generativeai) ####
inputs_path = "inputs" # Defines the directory where PDF files are located
files = [os.path.join(inputs_path, pdf) for pdf in os.listdir(inputs_path)] # Lists all files in inputs_path with full path
files.sort(key=os.path.getctime) # Sorts files by creation date
for path in files: # Starts the loop in the list of full file paths
    pdf_name = os.path.basename(path) # gets the file name
    if pdf_name.endswith('.pdf'):  # Checks if the file is a PDF      
        file = genai.upload_file(path=path, display_name=pdf_name)  # Uploads the file   
        print(f"Uploaded file '{file.display_name}' as: {file.uri}")
        time.sleep(1) # waits 1 second before iterating again, to facilitate viewing the difference in file creation dates in the cloud
uploaded_files = list(genai.list_files())  # creates the list of uploads using the genai.list_files() generator
sorted_files = sorted(uploaded_files, key=lambda file: file.create_time) # Sorts the list of uploads by the 'create_time' attribute
for file in sorted_files: # Displays the sorted list of uploads
    print(f"{file.display_name} - {file.create_time}")

# Defines the prompt that will be sent with each PDF to Gemini and executes all necessary functions to generate mind maps in .png format

In [None]:
# Execução principal
if __name__ == "__main__":
    # Assumindo que sorted_files é definido em algum lugar antes desta chamada
    process_files(sorted_files)