### 🧠 Deadline Manager Agent – EY AI Challenge

Modular notebook: OCR, date parsing, working-days, LLM agent para prazos legais e integração opcional de calendário.

In [None]:
# DEPENDENCIES: Some useful dependencies. Theu might not be necessary.
!apt-get update && apt-get install -y tesseract-ocr
!pip install --upgrade pytesseract PyPDF2 pillow dateparser python-dateutil holidays transformers huggingface_hub[hf_xet]

In [None]:
# IMPORTS: Some useful libraries. They might not be necessary
import os
from datetime import datetime, timedelta
from dateparser.search import search_dates
import dateparser
from dateutil.relativedelta import relativedelta
import holidays
import pytesseract
from PIL import Image
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

### 🖼️ OCR & PDF Extraction
Functions to read text in images (Tesseract) and PDFs.

In [None]:
def extract_text_from_image(path):
    """Base da extração de texto a partir de uma imagem (em português)."""
    return pytesseract.image_to_string(Image.open(path), lang='por')

def extract_text_from_pdf(path):
    """Base da extração de texto de todas as páginas de um PDF."""
    rdr = PdfReader(path)
    return "\n".join(page.extract_text() or "" for page in rdr.pages)

### 🧠 Data extraction (NLU)
Extract the first future date from a free text like `dateparser.search.search_dates`.

In [None]:
def infer_deadline(text, base_date=None):
    """Base da identificação de uam data a partir de uma imagem."""
    base = base_date or datetime.now()
    res = search_dates(
        text,
        languages=['pt','en'],
        settings={
            'PREFER_DATES_FROM':'future',
            'RELATIVE_BASE':base,
            'DATE_ORDER':'DMY'
        }
    )
    return res[0][1] if res else None

### 📅 Work days calculation (PT)
Add work days to a date, excluding weekends and Portuguese holidays.

In [None]:
def add_working_days(start_date, days):
    """Base de unção auxiliar para somar dias úteis a uma data, gerir férias judiciais, etc."""
    pt_hols = holidays.Portugal()
    curr = start_date
    added = 0
    while added < days:
        curr += relativedelta(days=1)
        if curr.weekday() < 5 and curr not in pt_hols:
            added += 1
    return curr

### 📅 Lógica fiscal (PT)
Fiscal rules that may appear.

In [None]:
from datetime import date

MESES_PT = {
    "janeiro": 1, "fevereiro": 2, "março": 3, "abril": 4,
    "maio": 5, "junho": 6, "julho": 7, "agosto": 8,
    "setembro": 9, "outubro": 10, "novembro": 11, "dezembro": 12
}

def interpretar_obrigacao_fiscal(texto, ano_base=None):
    """Extrai mês do texto e retorna prazos fiscais típicos (IRS, TSU, IVA)."""
    ano_base = ano_base or datetime.now().year
    texto = texto.lower()

    for mes_nome, mes_num in MESES_PT.items():
        if f"processar {mes_nome}" in texto or f"referente a {mes_nome}" in texto:
            # Cálculo do mês seguinte
            mes_seg = mes_num + 1 if mes_num < 12 else 1
            ano_seg = ano_base if mes_num < 12 else ano_base + 1
            ano_iva = ano_base if mes_num + 2 <= 12 else ano_base + 1
            mes_iva = (mes_num + 2 - 1) % 12 + 1

            return {
                "irs": date(ano_seg, mes_seg, 20),
                "tsu": date(ano_seg, mes_seg, 20),
                "iva_entrega": date(ano_iva, mes_iva, 10),
                "iva_pagamento": date(ano_iva, mes_iva, 15),
                "mes_base": mes_nome
            }
    return None

### 🤖 Deadline Agent (LLM Free)
One type of open-source model (Flan-T5 small) to apply the following rules:
- Modelo 22: up to 31/jul
- IES: 15/apr (current and next year)
- Others: infer via NLP

In [None]:
# Implementation using simple LLM

tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
model     = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")

def llm_generate(prompt: str, max_length: int = 256) -> str:
    inputs = tokenizer(prompt, return_tensors="pt").input_ids
    outs = model.generate(
        inputs, num_beams=4, early_stopping=True, max_length=max_length
    )
    return tokenizer.decode(outs[0], skip_special_tokens=True)

def agent_process(text, reference_date=None):
    """ Base de um Agente que infere deadlines aplicando regras legais ou simplesmente Língua Natural. Retorna a data em dicionário apto para JSON {'deadline': datetime} ou {'error':...}."""

    ref = reference_date or datetime.now()
    
    prompt = f"""
You are a Portuguese legal deadline assistant. Determine the deadline for the request below using these rules:
- "Modelo 22": due by {ref.year}-07-31
- "IES": due by {ref.year}-04-15 if before, else {ref.year+1}-04-15
- Otherwise infer via natural language (e.g. "5 working days from now").
Reference date: {ref.strftime('%Y-%m-%d')}
Input: "{text}"
Return ONLY a JSON object with key "deadline" (ISO8601 date string).
"""
    
    raw = llm_generate(prompt)
    
    try:
        obj = json.loads(raw)
        d = dateparser.parse(obj['deadline'])
        return {'deadline': d}
    except Exception as e:
        return {'error': f'LLM parse error: {e} | raw: {raw}'}

In [None]:
# Implementation using Gemini LLM

def config_llm_gemini(temperature:int):
  '''LLM api calling using Gemini  '''
  # Steps for students:
  # - Go to https://aistudio.google.com/app/apikey and generate your Gemini API key.
  # - Add the necessary packages to your requirements.txt:
  #    langchain
  #    langchain-google-genai
  # - Run the following command to install them:
  #     !pip install -r requirements.txt
  # - Follow the official integration guide for LangChain + Google Generative AI:
  #     https://python.langchain.com/docs/integrations/chat/google_generative_ai/
  # Pay attention to the request limits of the chosen model.
  return "llm" #Should return the LLM response

### 🔗 Calendar integration (Opcional)
Function to create events in external calendar tool

In [None]:
# def create_calendar_event(summary, start, end, timezone='UTC'):
#     pass  # implementar conforme API desejada

### 🧪 Use case examples

In [None]:
# Exemplo OCR:
# img_text = extract_text_from_image('scan.png')
# print(infer_deadline(img_text))

# Exemplo agente:
# print(agent_process('Entregar Modelo 22'))
# print(agent_process('Enviar IES até dia 15 de abril'))

# Working days:
# base = datetime(2025,5,27)
# print(add_working_days(base,5))