In [None]:
import os
import re
from unsloth import FastVisionModel
from models import qwen2vl as qwn
from models import phi4vl as phi
from models import aya8vl as aya
import docdataset as dd
import prompt_templates as pt
from prompt_templates import TextSchema, TableSchema, TitleSchema, ClassSchema
import docfocus as df

In [2]:
cd /root/ucu-mt/src

/root/ucu-mt/src


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [3]:
dataset = dd.download_dataset()['train']

## Prompting Open-source Models

In [None]:
model = qwn.QwenVL2_LLM(
    model_name = "unsloth/Qwen2.5-VL-7B-Instruct",
    max_new_tokens = 4096,
    device = "cuda",
    load_in_4bit = False,
    use_gradient_checkpointing = "unsloth"
)

==((====))==  Unsloth 2025.3.19: Fast Qwen2_5_Vl patching. Transformers: 4.50.3.
   \\   /|    NVIDIA GeForce RTX 3090. Num GPUs = 1. Max memory: 23.684 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: QLoRA and full finetuning all not selected. Switching to 16bit LoRA.


Loading checkpoint shards: 100%|██████████| 5/5 [00:02<00:00,  2.37it/s]
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [6]:
model = phi.Phi4VisionLLM(
    model_name = "microsoft/Phi-4-multimodal-instruct",
    max_new_tokens = 1000,
    device = "cuda",
    attn_mech = "flash_attention_2"
)

Loading checkpoint shards: 100%|██████████| 3/3 [00:10<00:00,  3.60s/it]


In [None]:
model = aya.AyaVisionLLM(
    model_name = "CohereForAI/aya-vision-8b",
    max_new_tokens = 4096,
    device = "cuda"
)

In [None]:
model = qwn.QwenVL2_LLM(
    model_name = "ansu0122/uadoc-ada-qwen2.5vl",
    max_new_tokens = 4096,
    device = "cuda",
    load_in_4bit = False,
    use_gradient_checkpointing = "unsloth"
)

In [None]:
image = dataset[-10]["image"]

model.set_prompt(pt.get_text_template())

result = model.process_doc_image(image)

if result:
    print("result:", result)
else:
    print("Failed to parse the response.")

In [None]:
# OCR Text sections
dataset = dataset.shuffle(seed=42)

model.set_prompt(pt.get_text_template())
df.ocr_dataset(dataset, "../results/ocr_text_phi4vl.jsonl", ocr_fn= model.process_doc_image, chunk_size=2, lang="ukr", region_types=["text"])

In [None]:
# OCR Doc sections
dataset = dataset.shuffle(seed=42)

model.set_prompt(pt.get_text_template())
df.ocr_dataset(dataset, "../results/ocr_whole_doc_phi4vl.jsonl", ocr_fn= model.process_doc_image, chunk_size=2, lang="ukr")

In [None]:
# Table Extraction Table sections
dataset = dataset.shuffle(seed=42)

model.set_prompt(pt.get_table_template())
df.ocr_dataset(dataset, "../results/table_table_phi4vl.jsonl", ocr_fn= model.process_doc_image, chunk_size=2, lang="ukr", region_types=["table"])

In [None]:
# Table Extraction Doc sections
dataset = dataset.shuffle(seed=42)

model.set_prompt(pt.get_table_template())
df.ocr_dataset(dataset, "../results/table_whole_doc_phi4vl.jsonl", ocr_fn= model.process_doc_image, chunk_size=2, lang="ukr")

In [None]:
# Layout Analysis Doc sections
dataset = dataset.shuffle(seed=42)

model.set_prompt(pt.get_title_template())
df.ocr_dataset(dataset, "../results/layout_whole_doc_phi4vl.jsonl", ocr_fn= model.process_doc_image, chunk_size=2, lang="ukr")

In [None]:
# Classification Doc sections
dataset = dataset.shuffle(seed=42)

model.set_prompt(pt.get_class_template())
df.ocr_dataset(dataset, "../results/class_whole_doc_phi4vl.jsonl", ocr_fn= model.process_doc_image, chunk_size=2, lang="ukr")

### Release Memory

In [6]:
import torch
del model 
torch.cuda.empty_cache()

In [9]:
!nvidia-smi

Thu Mar 27 19:34:16 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.107.02             Driver Version: 550.107.02     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3090        On  |   00000000:09:00.0 Off |                  N/A |
| 30%   47C    P5             59W /  350W |     300MiB /  24576MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [21]:
print(f"Memory allocated: {torch.cuda.memory_allocated() / 1024 / 1024:.2f} MB")
print(f"Memory reserved: {torch.cuda.memory_reserved() / 1024 / 1024:.2f} MB")

Memory allocated: 10.12 MB
Memory reserved: 40.00 MB
