In [1]:
import requests
from pathlib import Path
from PIL import Image
from io import BytesIO
import pandas as pd

### Reading images and labels from API

In [2]:
document_id = 5

In [3]:
get_samples_url = f"http://localhost:8000/document_generator/api/documents/{document_id}/get_samples/"

In [4]:
with requests.Session() as session:
    res_samples = session.get(get_samples_url)

if res_samples.status_code != 200:
    raise ValueError(f"cannot retrieve samples for document: {document_id}")
else:
    samples = res_samples.json()

In [5]:
len(samples)

10

In [6]:
all_boxes = []

for sample in samples:
    sample_id = sample["id"]
    get_sample_boxes_url = f"http://localhost:8000/document_generator/api/sample_documents/{sample_id}/get_boxes/"

    with requests.Session() as session:
        res_boxes = session.get(get_sample_boxes_url)
    
    if res_boxes.status_code != 200:
        raise ValueError(f"cannot retrieve boxes for sample: {sample_id}")
    else:
        boxes = res_boxes.json()

    all_boxes += boxes

In [8]:
all_boxes_df = pd.DataFrame(all_boxes)

In [9]:
len(all_boxes_df), all_boxes_df["sample_document"].nunique()

(20, 10)

In [10]:
all_boxes_df.head()

Unnamed: 0,id,name,label,start_x_norm,start_y_norm,end_x_norm,end_y_norm,sample_document,template_box
0,28,cognome,NZAEXZ,0.081159,0.165049,0.23913,0.184466,113,18
1,29,cognome_b,WYLHGZXXYX,0.728986,0.165049,0.942029,0.183495,113,19
2,30,cognome,VNY,0.081159,0.165049,0.146377,0.184466,114,18
3,31,cognome_b,DJJIWZHCFQP,0.728986,0.165049,0.976812,0.182524,114,19
4,32,cognome,DVUOM,0.081159,0.165049,0.198551,0.184466,115,18


### Saving images and Labels

In [25]:
output_path_labels = Path(f"../data/samples/document_{document_id}")
output_path_images = Path(f"../data/samples/document_{document_id}/images")

In [26]:
output_path_labels.mkdir(parents=True, exist_ok=True)
output_path_images.mkdir(parents=True, exist_ok=True)

In [27]:
for sample in samples:
    res_image = session.get("http://localhost:8000/" + sample["image"])
    
    if res_image.status_code == 200:
        sample_image = Image.open(BytesIO(res_image.content))
    else:
        raise ValueError(f"could not retrieve the image error: {res_image.status_code}")

    sample_image.save(output_path_images / f"sample_{sample["id"]}.png")

In [28]:
all_boxes_df.to_csv(output_path_labels / "labels.csv", index=False)