In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import ast

### Read data

In [17]:
data = pd.read_csv('data/CLEAN.csv')
triplets = pd.read_csv('data/TRIPLETS.csv')

Check for completeness


In [4]:
exists, total = 0, 0
for image_uuid in data['image_uuid']:
    total += 1
    if os.path.isfile('data/images/' + image_uuid + '.png'):
        exists += 1

print(f'{exists} out of {total} images exist') # Should be 1836 out of 1836

1836 out of 1836 images exist


### Basic statistics

In [6]:
print(f'Total number of images: {len(data["image_uuid"].unique())}')
print(f'Total number of patients: {len(data["patient_hash"].unique())}')
print(f'Image-pre-patient ration: {len(data["image_uuid"].unique()) / len(data["patient_hash"].unique()):.2f}')

Total number of images: 1836
Total number of patients: 264
Image-pre-patient ration: 6.95


In [13]:
print('Modality')
print('--------')
print(f'Number of IR images            : {len(data[data["image_type"] == "Scanning Laser Ophthalmoscope"])} ({len(data[data["image_type"] == "Scanning Laser Ophthalmoscope"]) / len(data) * 100:.2f}%)')
print(f'Number of horizontal OCT images: {len(data[data["image_orientation"] == "horizontal"])} ({len(data[data["image_orientation"] == "horizontal"]) / len(data) * 100:.2f}%)')
print(f'Number of vertical OCT images  : {len(data[data["image_orientation"] == "vertical"])} ({len(data[data["image_orientation"] == "vertical"]) / len(data) * 100:.2f}%)')

Modality
--------
Number of IR images            : 898 (48.91%)
Number of horizontal OCT images: 468 (25.49%)
Number of vertical OCT images  : 470 (25.60%)


In [9]:
print('Laterality')
print('----------')
print(f'Number of left eye images: {len(data[data["laterality"] == "L"])} ({len(data[data["laterality"] == "L"]) / len(data) * 100:.2f}%)')
print(f'Number of right eye images: {len(data[data["laterality"] == "R"])} ({len(data[data["laterality"] == "R"]) / len(data) * 100:.2f}%)')

Laterality
----------
Number of left eye images: 711 (38.73%)
Number of right eye images: 1125 (61.27%)


In [14]:
print('Proliferation')
print('-------------')
print(f'Number of NPDR images: {len(data[data["proliferation"] == "NPDR"])} ({len(data[data["proliferation"] == "NPDR"]) / len(data) * 100:.2f}%)')
print(f'Number of PDR images : {len(data[data["proliferation"] == "PDR"])} ({len(data[data["proliferation"] == "PDR"]) / len(data) * 100:.2f}%)')

Proliferation
-------------
Number of NPDR images: 1247 (67.92%)
Number of PDR images : 589 (32.08%)


In [16]:
print('Combined')
print('--------')
print(f'Left / Horizontal / NPDR : {len(data[(data["laterality"] == "L") & (data["image_orientation"] == "horizontal") & (data["proliferation"] == "NPDR")])}')
print(f'Right / Horizontal / NPDR: {len(data[(data["laterality"] == "R") & (data["image_orientation"] == "horizontal") & (data["proliferation"] == "NPDR")])}')
print(f'Left / Vertical / NPDR   : {len(data[(data["laterality"] == "L") & (data["image_orientation"] == "vertical") & (data["proliferation"] == "NPDR")])}')
print(f'Right / Vertical / NPDR  : {len(data[(data["laterality"] == "R") & (data["image_orientation"] == "vertical") & (data["proliferation"] == "NPDR")])}')
print(f'Left / Fundus / NPDR     : {len(data[(data["laterality"] == "L") & (data["image_type"] == "Scanning Laser Ophthalmoscope") & (data["proliferation"] == "NPDR")])}')
print(f'Right / Fundus / NPDR    : {len(data[(data["laterality"] == "R") & (data["image_type"] == "Scanning Laser Ophthalmoscope") & (data["proliferation"] == "NPDR")])}')
print(f'Left / Horizontal / PDR  : {len(data[(data["laterality"] == "L") & (data["image_orientation"] == "horizontal") & (data["proliferation"] == "PDR")])}')
print(f'Right / Horizontal / PDR : {len(data[(data["laterality"] == "R") & (data["image_orientation"] == "horizontal") & (data["proliferation"] == "PDR")])}')
print(f'Left / Vertical / PDR    : {len(data[(data["laterality"] == "L") & (data["image_orientation"] == "vertical") & (data["proliferation"] == "PDR")])}')
print(f'Right / Vertical / PDR   : {len(data[(data["laterality"] == "R") & (data["image_orientation"] == "vertical") & (data["proliferation"] == "PDR")])}')
print(f'Left / Fundus / PDR      : {len(data[(data["laterality"] == "L") & (data["image_type"] == "Scanning Laser Ophthalmoscope") & (data["proliferation"] == "PDR")])}')
print(f'Right / Fundus / PDR     : {len(data[(data["laterality"] == "R") & (data["image_type"] == "Scanning Laser Ophthalmoscope") & (data["proliferation"] == "PDR")])}')

Combined
--------
Left / Horizontal / NPDR : 117
Right / Horizontal / NPDR: 197
Left / Vertical / NPDR   : 117
Right / Vertical / NPDR  : 198
Left / Fundus / NPDR     : 228
Right / Fundus / NPDR    : 390
Left / Horizontal / PDR  : 63
Right / Horizontal / PDR : 91
Left / Vertical / PDR    : 64
Right / Vertical / PDR   : 91
Left / Fundus / PDR      : 122
Right / Fundus / PDR     : 158


In [18]:
# TRIPLETS
print('Triplets')
print('--------')
print(f'Number of triplets: {len(triplets)}')
print(f'Number of patients: {len(triplets["patient_hash"].unique())}')

Triplets
--------
Number of triplets: 874
Number of patients: 253


In [22]:
print('Combined')
print('--------')
print(f'Left / NPDR: {len(triplets[(triplets["laterality"] == "L") & (triplets["proliferation"] == "NPDR")])} ({len(triplets[(triplets["laterality"] == "L") & (triplets["proliferation"] == "NPDR")]) / len(triplets):.1%})')
print(f'Right / NPDR: {len(triplets[(triplets["laterality"] == "R") & (triplets["proliferation"] == "NPDR")])} ({len(triplets[(triplets["laterality"] == "R") & (triplets["proliferation"] == "NPDR")]) / len(triplets):.1%})')
print(f'Left / PDR: {len(triplets[(triplets["laterality"] == "L") & (triplets["proliferation"] == "PDR")])} ({len(triplets[(triplets["laterality"] == "L") & (triplets["proliferation"] == "PDR")]) / len(triplets):.1%})')
print(f'Right / PDR: {len(triplets[(triplets["laterality"] == "R") & (triplets["proliferation"] == "PDR")])} ({len(triplets[(triplets["laterality"] == "R") & (triplets["proliferation"] == "PDR")]) / len(triplets):.1%})')

Combined
--------
Left / NPDR: 224 (25.6%)
Right / NPDR: 384 (43.9%)
Left / PDR: 116 (13.3%)
Right / PDR: 150 (17.2%)


### Image Analysis

In [85]:
img_path = 'data/images/'
IR = {'shapes': [], 'min_value': 1., 'max_value': 0.}
OCT = {'shapes': [], 'min_value': 1., 'max_value': 0.}

for _, item in combined.iterrows():
    img = plt.imread(img_path + item['image_uuid'] + '.png')
    if item['image_type'] == 'Scanning Laser Ophthalmoscope':
        IR['shapes'].append(img.shape)
        IR['min_value'] = min(IR['min_value'], img.min())
        IR['max_value'] = max(IR['max_value'], img.max())
    elif item['image_type'] == 'Optical Coherence Tomography Scanner':
        OCT['shapes'].append(img.shape)
        OCT['min_value'] = min(OCT['min_value'], img.min())
        OCT['max_value'] = max(OCT['max_value'], img.max())

IR_shape_occurrences = {}
for shapes in set(IR['shapes']):
    IR_shape_occurrences[shapes] = IR['shapes'].count(shapes)
OCT_shape_occurrences = {}
for shapes in set(OCT['shapes']):
    OCT_shape_occurrences[shapes] = OCT['shapes'].count(shapes)


print(f'Shape(s) of IR images: {IR_shape_occurrences}') # TODO: Need to resize to model input size
print(f'Minimum value of IR images: {IR["min_value"]}')
print(f'Maximum value of IR images: {IR["max_value"]}')
print()
print(f'Shape(s) of OCT scans: {OCT_shape_occurrences}')
print(f'Minimum value of OCT scans: {OCT["min_value"]}')
print(f'Maximum value of OCT scans: {OCT["max_value"]}')


Shape(s) of IR images: {(768, 768): 750, (768, 768, 4): 236, (1536, 1536): 4}
Minimum value of IR images: 0.0
Maximum value of IR images: 1.0

Shape(s) of OCT scans: {(496, 768): 750, (496, 768, 4): 236, (496, 1536): 4}
Minimum value of OCT scans: 0.0
Maximum value of OCT scans: 1.0
