## Notebook 5: Data loaders

In [6]:
# Imports
import os
import numpy as np
import pandas as pd
import cv2

# Matplotlib for plotting
from matplotlib import pyplot as plt
from matplotlib.pyplot import cm

# PyTorch methods
from torch.utils.data import DataLoader
import torchxrayvision as xrv

# Albumentations library
import albumentations as alb

# Appearance of the Notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
np.set_printoptions(linewidth=110)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import dentexmodel as dm
from dentexmodel.imageproc import ImageData
from dentexmodel.torchdataset import DatasetFromDF, load_and_process_image

print(f'Project module version: {dm.__version__}')

ModuleNotFoundError: No module named 'torchxrayvision'

In [3]:
# Path settings 
dentex_dir = os.path.join(os.environ['HOME'], 'data', 'dentex')
data_dir = os.path.join(dentex_dir, 'dentex_disease')
image_dir = os.path.join(data_dir, 'quadrant-enumeration-disease', 'xrays', 'crop')
data_file_name = 'dentex_disease_datasplit.parquet'
data_file = os.path.join(dentex_dir, data_file_name)

### Create PyTorch dataset from data frame ###

In [4]:
data_df = pd.read_parquet(data_file)
# Convert class names to labels
cl_names = sorted(list(data_df['label'].unique()))
# Let's assign number to the classes
label_dict = dict(zip(cl_names, range(len(cl_names))))
cl_dict = dict(zip(label_dict.values(), label_dict.keys()))

# Add the class labels to the data frame
seed = np.random.seed(123)
data_df = data_df.\
                assign(cl=data_df['label'].apply(lambda l: label_dict.get(l))).\
                sample(frac=1, random_state=seed).\
                reset_index(drop=True)

# Show the class labels
display(pd.DataFrame(label_dict, index=[0]).iloc[0])

Caries               0
Deep Caries          1
Impacted             2
Periapical Lesion    3
Name: 0, dtype: int64

### PyTorch Model ###

In [5]:
class Model:
    """ The model architecture """

    def __init__(self, n_outputs=2):
        self.n_outputs = n_outputs

    def create_model(self):
        """ ResNet 50 model """
        model = xrv.models.ResNet(weights="resnet50-res512-all")
        model = model.model
        model.fc = nn.Sequential(
            nn.Linear(in_features=2048, out_features=512),
            nn.ReLU(),
            nn.Linear(in_features=512, out_features=self.n_outputs))
        return model