In [5]:
import os
import time
import base64
from pathlib import Path
from google.generativeai import GenerativeModel
from google.generativeai import configure
import google.ai.generativelanguage as glm
from dotenv import load_dotenv
import base64

load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

if not api_key:
    raise ValueError("GEMINI_API_KEY not found in environment variables.  "
                     "Make sure you have a .env file with GEMINI_API_KEY=<your_api_key>")

configure(api_key=api_key, )

In [4]:
def encode_pdf(pdf_path):
    """Encodes a PDF file to a base64 string."""
    try:
        with open(pdf_path, "rb") as pdf_file:
            pdf_data = pdf_file.read()
            encoded_string = base64.b64encode(pdf_data).decode("utf-8")
        return encoded_string
    except Exception as e:
        print(f"Error encoding {pdf_path}: {e}")
        return None

In [3]:
def analyze_pdf(pdf_path, encoded_string):
    """Analyzes a PDF file using Gemini to determine if it's superseded, a cancellation notice, or normal and provides a short justification."""
    if not encoded_string:
        return "Encoding Error", None

    try:
        document_part = glm.Part(
            inline_data=glm.Blob(
                mime_type="application/pdf",
                data=base64.b64decode(encoded_string)
            )
        )

        model = GenerativeModel('gemini-1.5-flash')

        prompt = """Analyze the content of this PDF document, which is an airworthiness directive.
        Determine if it has been superseded by a newer AD or if it's still a current, active AD.

        Respond in the following format:
        Classification: [Superseded/Normal]
        Justification: [Short 1-2 sentence explanation of why you classified it as such, pointing to clues in the text].

        For example:
        Classification: Superseded
        Justification: This AD is marked as superseded in many sections of the document. Therefore it is outdated.

        If you are unsure, respond with "Normal" and a suitable justification. Be careful that you don't mark ADs that only supersede other ADs as superseded, when they have not yet been superseded themselves.
        """

        responses = model.generate_content([prompt, document_part], stream=False)
        response_text = responses.text.strip()

        # Extract Classification and Justification
        try:
            classification = response_text.split("Classification:")[1].split("Justification:")[0].strip().replace("-","").strip()
            justification = response_text.split("Justification:")[1].strip()
        except IndexError:  # Handle cases where parsing fails
            print(f"Error parsing Gemini response for {pdf_path}.  Defaulting to Normal.")
            return "Normal", "Unable to definitively determine from the text.  Assuming it is a normal AD."

        # Validation:
        if classification not in ["Superseded", "Normal"]:
            print(f"Unexpected classification from Gemini: {classification}.  Treating as Normal.")
            return "Normal", "Gemini returned an invalid classification; defaulting to Normal."

        return classification, justification

    except Exception as e:
        print(f"Error analyzing {pdf_path}: {e}")
        return "Error", str(e)  # Return error type as justification


In [4]:
def process_directory(directory):
    """Processes all PDF files in a directory and returns a list of results."""
    results = []
    for filename in os.listdir(directory):
        if filename.endswith(".pdf"):
            filepath = os.path.join(directory, filename)
            encoded_string = encode_pdf(filepath)
            if encoded_string:
                classification, justification = analyze_pdf(filepath, encoded_string)
                results.append((filename, classification, justification))
            else:
                results.append((filename, "Encoding Error", None))
    return results

In [5]:
# Example usage
directory_to_process = r"C:\Users\zdrop\PycharmProjects\BRUMBRUMWEEEE\ADs"  # Replace with your directory
analysis_results = process_directory(directory_to_process)

# Print results
for filename, classification, justification in analysis_results:
    print(f"{filename}:")
    print(f"  Classification: {classification}")
    if justification:
        print(f"  Justification: {justification}")
    else:
        print(f"  Justification: (See error message)")

AD_2006-0112R1_1.pdf:
  Classification: Superseded
  Justification: The document itself states that it "revises and replaces EASA AD 2006-0112 dated 15 May 2006," indicating it has been superseded by a later revision.
AD_2006-0129_1.pdf:
  Classification: Superseded
  Justification: The document is clearly marked "SUPERSEDED" across multiple sections.  This indicates that it has been replaced by a later, more current Airworthiness Directive.
AD_2010-0127_1.pdf:
  Classification: Normal
  Justification: The document states "Supersedure: None" indicating that it has not been superseded by another AD at the time of issuance.  There is no mention of supersedure within the body of the document itself.


In [16]:
from pydantic import BaseModel, Field
from google.generativeai import GenerativeModel, GenerationConfig


class Classification(BaseModel):
    superseded: bool = Field(..., description="true means ...")
    canceled: bool = Field(..., description="")
    language: bool = Field(..., description="true means Eng ...")
    reason: str = Field(..., description="Provide a plan how you want to classify the document and explained what will be the foundation of your classification.")

    
#json_schema = Classification.model_json_schema()


#print(json_schema)


encoded_string = encode_pdf(r"C:\Users\zdrop\PycharmProjects\BRUMBRUMWEEEE\ADs\AD_F-1995-129-013_2.pdf")


model = GenerativeModel(generation_config=GenerationConfig(response_schema=Classification,
                                                           response_mime_type="application/json", temperature=0), model_name='gemini-1.5-flash')

responses = model.generate_content(["Classify below", encoded_string],stream=False)

In [17]:
print(responses.text)

{"canceled": false, "language": true, "reason": "The provided text is a PDF file encoded in base64.  To classify it, I would need to decode the base64 string, extract the text content, and then apply a text classification model.  The classification would depend on the content of the PDF; for example, it could be classified as a document, report, form, or other type based on its textual features and structure.", "superseded": false}
