In [None]:
pip install google-genai python-dotenv

In [1]:
import os
import json
from dotenv import load_dotenv
from datetime import datetime

from google import genai
from google.genai import types

In [2]:
load_dotenv()
API_KEY = os.getenv("GEMINI_API_KEY")

client = genai.Client(api_key=API_KEY)

In [6]:
def upload_file(filepath):
    uploaded_file = client.files.upload(file=filepath)
    print(f"✅ File uploaded: {uploaded_file.uri}")
    return uploaded_file

In [20]:
file_path = "./reports/sugar-12-24.pdf"

# Step 1: Upload file
uploaded_file = upload_file(file_path)
print(uploaded_file)

✅ File uploaded: https://generativelanguage.googleapis.com/v1beta/files/fhwg1d21po9c
name='files/fhwg1d21po9c' display_name=None mime_type='application/pdf' size_bytes=122135 create_time=datetime.datetime(2025, 6, 18, 19, 21, 31, 794131, tzinfo=TzInfo(UTC)) expiration_time=datetime.datetime(2025, 6, 20, 19, 21, 31, 750681, tzinfo=TzInfo(UTC)) update_time=datetime.datetime(2025, 6, 18, 19, 21, 31, 794131, tzinfo=TzInfo(UTC)) sha256_hash='Nzc0OTIxMjAyZWM4M2ViYmYyMGNlMTc4MTAwMjAxMDQzN2I2YWQ1ZTMwZmI1Mzc5MTg4OWE0M2EzMTU0ZmMxMQ==' uri='https://generativelanguage.googleapis.com/v1beta/files/fhwg1d21po9c' download_uri=None state=<FileState.ACTIVE: 'ACTIVE'> source=<FileSource.UPLOADED: 'UPLOADED'> video_metadata=None error=None


In [8]:
# Load prompt template
def load_prompt(template_path):
    with open(template_path, "r", encoding="utf-8") as f:
        return f.read()

prompt_template = load_prompt("extraction_prompt.template")

In [21]:
def get_extracted_json(prompt, uploaded_file_uri):
    contents = [
        types.Content(
            role="user",
            parts=[
                types.Part(text=prompt),
                types.Part(file_data=types.FileData(file_uri=uploaded_file_uri))
            ]
        )
    ]
    response = client.models.generate_content(
        model="models/gemini-2.5-flash-preview-05-20",
        contents=contents,
        config=types.GenerateContentConfig(
            temperature=0.3,
            max_output_tokens=20000,
        )
    )
    return response.candidates[0].content.parts[0].text

raw_output = get_extracted_json(prompt_template, uploaded_file.uri)

In [10]:
def extract_json_from_markdown(text):
    if "```json" in text:
        json_data = text.split("```json")[1].split("```")[0].strip()
    elif "```" in text:
        json_data = text.split("```")[1].strip()
    else:
        json_data = text.strip()
    return json.loads(json_data)

In [22]:
#Clean JSON output
cleaned_json = extract_json_from_markdown(raw_output)

output_json_path = "extracted_report.json"


In [23]:
# Save to JSON file
output_dir = os.path.dirname(output_json_path)
if output_dir:
    os.makedirs(output_dir, exist_ok=True)
with open(output_json_path, "w", encoding="utf-8") as f:
    json.dump(cleaned_json, f, indent=2)
print(f"✅ Extracted JSON saved to {output_json_path}")

✅ Extracted JSON saved to extracted_report.json


In [24]:
# delete file after processing
def delete_file(file):
    client.files.delete(name=file.name)
    print(f"File deleted successfully: {file.name}")

# Now delete using uploaded_file.id, not name:
delete_file(uploaded_file)

File deleted successfully: files/fhwg1d21po9c
