## Data splits ##

In [1]:
import os
import sys
import glob
import logging
import json
import numpy as np
import pandas as pd
from PIL import Image
from pathlib import Path
from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle
import cv2

import albumentations as alb

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

%load_ext autoreload
%autoreload 2
import computervision
from computervision.dentex import Dentex
from computervision.imageproc import ImageData

# Print version info
print(f'Package version: {computervision.__version__}')
print(f'Python version:  {sys.version}')

Package version: v0.0.1
Python version:  3.12.3 (main, Jun 18 2025, 17:59:45) [GCC 13.3.0]


In [2]:
data_dir = os.path.join(os.environ.get('HOME'), 'data')
print(f'data_dir: {data_dir}')

# Directory to store the data
dataset_dir = os.path.join(data_dir, 'dentex')

# After extraction from the .tar.gz archive, the images should be on the local fs (see the first notebook)
xrays_dir = os.path.join(dataset_dir, 'cropped')

# Let's make sure that we have the expected number of images available
expected_images = 634
file_list = glob.glob(os.path.join(xrays_dir, '*.png'))
print('Make sure to run the first notebook which downloads the data.')
print(f'Found {len(file_list)} images in {xrays_dir}.')

# We want to be sure that the number of images is correct before we continue
assert expected_images == len(file_list), \
    f'WARNING: expected images ({expected_images}) != images on file system ({len(file_list)})'

data_dir: /app/data
Make sure to run the first notebook which downloads the data.
Found 634 images in /app/data/dentex/cropped.


## Load the annotations ##

In [3]:
parquet_file_name = 'train_quadrant_enumeration_cropped.parquet'
parquet_file = os.path.join(xrays_dir, parquet_file_name)
try:
    df = pd.read_parquet(parquet_file)
except Exception as e:
    print(f'ERROR: Could not load file: \n {e}')
    print('Make sure to run the first notebook which downloads the data.')
display(df.head())

Unnamed: 0,bbox,segmentation,height,width,file_name,quadrant,pos,fdi,ada
0,"[778, 102, 103, 376]","[[869, 478, 881, 102, 790, 113, 778, 469]]",847,1770,train_0_cropped.png,1,1,11,8
1,"[705, 107, 85, 377]","[[778, 484, 790, 110, 719, 107, 716, 299, 731,...",847,1770,train_0_cropped.png,1,2,12,7
2,"[643, 69, 85, 368]","[[699, 437, 728, 357, 719, 72, 646, 69, 643, 4...",847,1770,train_0_cropped.png,1,3,13,6
3,"[569, 31, 115, 403]","[[634, 434, 684, 378, 655, 31, 575, 40, 569, 3...",847,1770,train_0_cropped.png,1,4,14,5
4,"[481, 10, 100, 406]","[[549, 416, 581, 378, 578, 10, 493, 31, 490, 2...",847,1770,train_0_cropped.png,1,5,15,4
