In [15]:
import os
import sys
import glob
import logging
import json
import numpy as np
import pandas as pd
from pathlib import Path
from matplotlib import pyplot as plt

logger = logging.getLogger(__name__)

%load_ext autoreload
%autoreload 2
import computervision
from computervision.dentex import Dentex

# Print version info
print(f'Package version: {computervision.__version__}')
print(f'Python version:  {sys.version}')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Package version: v0.0.1
Python version:  3.12.3 (main, Jun 18 2025, 17:59:45) [GCC 13.3.0]


In [7]:
data_dir = os.path.join(os.environ.get('HOME'), 'data')

# Directory to store the data
dataset_dir = os.path.join(data_dir, 'dentex')
Path(dataset_dir).mkdir(parents=True, exist_ok=True)

## Download the Dentex data set ##

In [9]:
url = 'https://dsets.s3.amazonaws.com/dentex/dentex-quadrant-enumeration.tar.gz'
# The Dentex class in src/computervision/dentex.py contains 
# short methods for working with the dentex data set
url = os.environ.get('DT_URL')
print(f'Location of the data set for manual download: \n {url}')
tar_file = Dentex().download(path=dataset_dir, url=url)

Location of the data set for manual download: 
 https://dsets.s3.amazonaws.com/dentex/dentex-quadrant-enumeration.tar.gz


dentex-quadrant-enumeration.tar.gz: 1.72GB [00:50, 33.7MB/s]                               


Extracting from .gz archive.


In [13]:
# After extraction from the .tar.gz archive, the images should be on the local fs
xrays_dir = os.path.join(dataset_dir, 'quadrant_enumeration/xrays')
# Let's make sure that we have the expected number of images available
expected_images = 634
file_list = glob.glob(os.path.join(xrays_dir, '*.png'))
# We want to be sure that the number of images is correct before we continue
assert expected_images == len(file_list), \
    f'WARNING: expected images ({expected_images}) != images on file system ({len(file_list)})' 

## Load annotations ##

In [22]:
# The annotations come in a json file
json_file = os.path.join(dataset_dir, 'quadrant_enumeration/train_quadrant_enumeration.json')
with open(json_file, 'r') as file:
    annotations = json.load(file)
# Convert every item into a data frame
annotations = {k: pd.DataFrame(v) for k, v in annotations.items()}
print(annotations.keys())

dict_keys(['images', 'annotations', 'categories_1', 'categories_2'])
