In [1]:
import os
import base64
from google import genai
from dotenv import load_dotenv
import enum
import json

# Load the API key from the .env file
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

# Set up the Gemini API client
client = genai.Client(api_key=api_key)


class ADClassification(enum.Enum):  # Define Enum
    SUPERSEDED = "Superseded"
    NORMAL = "Normal"
    ERROR = "Error"
    INVALID_JSON = "Invalid JSON"  # Add for JSON errors
    INVALID_CLASSIFICATION = "Invalid Classification"


In [5]:
 # Function to encode the PDF file to base64
def encode_pdf(pdf_path):
    """Encodes a PDF file to a base64 string."""
    try:
        with open(pdf_path, "rb") as pdf_file:
            pdf_data = pdf_file.read()
            encoded_string = base64.b64encode(pdf_data).decode("utf-8")
        return encoded_string
    except Exception as e:
        print(f"Error encoding {pdf_path}: {e}")
        return None

In [7]:
def classify_pdf(file_path, prompt):
    encoded_pdf = encode_pdf(file_path)
    if not encoded_pdf:
        return "Error: Unable to encode PDF"
    contents = f"{prompt}\n{encoded_pdf}"

    response = client.models.generate_content(
        model='gemini-2.0-flash',
        contents=contents,
        config={
            'response_mime_type': 'application/json',
        },
    )

    try:
        response_json = json.loads(response.text)
        classification_str = response_json.get("classification")
        justification = response_json.get("justification")

        try:
            classification = ADClassification(classification_str)  # Convert to Enum
        except ValueError:
            classification = ADClassification.INVALID_CLASSIFICATION  # Enum for invalid
            justification = "LLM returned an invalid classification string."

        return classification, justification

    except json.JSONDecodeError:
        return ADClassification.INVALID_JSON, "Invalid JSON response from LLM"  # Enum
    except Exception as e:
        return ADClassification.ERROR, f"An error occurred: {e}"  # Enum

In [9]:
def process_pdf_directory(pdf_directory, prompt, classify_pdf_func):
    if not os.path.exists(pdf_directory):
        print(f"Error: Directory '{pdf_directory}' not found.")
        return {}  # Return empty dictionary

    results = {}
    try:
        for filename in os.listdir(pdf_directory):
            if filename.endswith('.pdf'):
                file_path = os.path.join(pdf_directory, filename)
                classification, justification = classify_pdf_func(file_path, prompt)
                results[filename] = {"classification": classification, "justification": justification}

                print(f'{filename}: {classification.name}')
                if justification:
                    print(f'  Justification: {justification}')
                print("-" * 20)

    except Exception as e: # Catch any other error
        print(f"An error occurred during directory processing: {e}")
        return {} # Return empty dictionary

    return results

In [10]:
pdf_directory = r"C:\Users\zdrop\PycharmProjects\BRUMBRUMWEEEE\Sample_of_a_sample"

prompt = """
Classify the following document as 'Superseded' or 'Normal' based on its content. Be careful that you don't mark ADs that only supersede other ADs as superseded, when they have not yet been superseded themselves. Respond in JSON format like this:

```json
{{
  "classification": "[Superseded/Normal]",  # EXACTLY "Superseded" or "Normal"
  "justification": "[Explanation of why the document was classified as such]"
}}
For example:
        AD_2005-01234R1_1.pdf: SUPERSEDED
          Justification: This AD is marked as superseded in many sections of the document. Therefore it is outdated.
"""

results = process_pdf_directory(pdf_directory, prompt, classify_pdf)  

An error occurred during directory processing: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'Resource has been exhausted (e.g. check quota).', 'status': 'RESOURCE_EXHAUSTED'}}


In [2]:
from google import genai
from google.genai import types
import base64

def generate():
  client = genai.Client(
      vertexai=True,
      project="mthesis-450913",
      location="us-central1",
  )


  model = "gemini-2.0-flash-lite-preview-02-05"
  contents = [
    types.Content(
      role="user",
      parts=[
        types.Part.from_text(text="""Telle me valenis day joke on polish.""")
      ]
    )
  ]
  generate_content_config = types.GenerateContentConfig(
    temperature = 1,
    top_p = 0.95,
    max_output_tokens = 8192,
    response_modalities = ["TEXT"],
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
  )

  for chunk in client.models.generate_content_stream(
    model = model,
    contents = contents,
    config = generate_content_config,
    ):
    print(chunk.text, end="")

generate()

RefreshError: ('invalid_grant: Bad Request', {'error': 'invalid_grant', 'error_description': 'Bad Request'})