# Overview

Get image index and process them to a data structure for torch

In [58]:
import os
import pandas as pd
import torchvision as tv
import torchvision.transforms.functional as TF

In [4]:
index_df = pd.read_csv("../../data/image_index.csv", index_col=0)
index_df.head()

## Train, Test, and Validation Data Index

In [15]:
train_df = index_df[index_df.data_set == 1]
train_df.reset_index(inplace=True)

In [16]:
test_df = index_df[index_df.data_set == 0]
test_df.reset_index(inplace=True)

In [17]:
val_df = index_df[index_df.data_set == 2]
val_df.reset_index(inplace=True)

## Image Processing

In [18]:
testpath = train_df.img[0]

In [19]:
testpath

'..\\..\\data\\extracted\\chest_xray\\train\\NORMAL\\IM-0115-0001.jpeg'

### Read Image as a Pytorch Tensor

In [22]:
img_tensor = tv.io.read_image(testpath)

In [28]:
list(img_tensor.shape)

[1, 1858, 2090]

### Are we dealing with clean data?

In [48]:
tensor_counts = []

for path in index_df.img:
    img = tv.io.read_image(path)
    shape_list = list(img.shape)
    if shape_list[0] == 3:
        print(path)
    tensor_counts.append(shape_list)  

..\..\data\extracted\chest_xray\train\PNEUMONIA\person1010_virus_1695.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person1035_bacteria_2969.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person1052_virus_1751.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person1073_bacteria_3008.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person1099_virus_1819.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person1104_virus_1826.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person1108_virus_1833.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person1127_bacteria_3068.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person1129_virus_1857.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person1131_bacteria_3073.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person1132_virus_1863.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person1133_virus_1865.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person1142_virus_1892.jpeg
..\..\data\ex

..\..\data\extracted\chest_xray\train\PNEUMONIA\person296_virus_613.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person306_virus_628.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person309_virus_631.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person316_virus_641.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person320_virus_647.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person325_virus_659.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person325_virus_660.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person325_virus_661.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person325_virus_664.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person325_virus_665.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person326_virus_670.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person334_virus_689.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person338_virus_694.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\per

..\..\data\extracted\chest_xray\train\PNEUMONIA\person680_bacteria_2575.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person696_bacteria_2594.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person69_bacteria_338.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person709_bacteria_2608.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person712_virus_1310.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person714_bacteria_2615.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person718_virus_1316.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person719_virus_1338.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person734_virus_1355.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person736_virus_1358.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person739_virus_1361.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person740_bacteria_2643.jpeg
..\..\data\extracted\chest_xray\train\PNEUMONIA\person745_virus_1368.jpeg
..\..\data\extracted\c

The output tensor shape is in CHW (Channel, Heigh, Width)

In [38]:
tensor_counts

[[1, 1317, 1857],
 [1, 1509, 2111],
 [1, 1837, 2031],
 [1, 1326, 1663],
 [1, 1818, 2053],
 [1, 1443, 1852],
 [1, 1478, 1873],
 [1, 1916, 2144],
 [1, 1945, 2214],
 [1, 1785, 1863],
 [1, 2155, 2444],
 [1, 1238, 1803],
 [1, 2057, 2498],
 [1, 1559, 1984],
 [1, 1432, 1917],
 [1, 1893, 2624],
 [1, 617, 1300],
 [1, 1384, 2022],
 [1, 2458, 2720],
 [1, 1827, 2271],
 [1, 2124, 2522],
 [1, 2139, 2343],
 [1, 1314, 1647],
 [1, 1632, 1949],
 [1, 1953, 2170],
 [1, 1836, 2024],
 [1, 1534, 1658],
 [1, 2329, 2480],
 [1, 1709, 2096],
 [1, 1504, 1741],
 [1, 1793, 2153],
 [1, 2071, 2022],
 [1, 2032, 2297],
 [1, 2382, 2746],
 [1, 1725, 2001],
 [1, 1060, 1762],
 [1, 1882, 2752],
 [1, 1041, 1616],
 [1, 1318, 1754],
 [1, 1761, 2034],
 [1, 1021, 1562],
 [1, 1125, 1572],
 [1, 1226, 1974],
 [1, 1392, 2034],
 [1, 1558, 2060],
 [1, 650, 1322],
 [1, 1068, 1786],
 [1, 1246, 1740],
 [1, 1230, 1774],
 [1, 767, 1260],
 [1, 925, 1240],
 [1, 1187, 1754],
 [1, 1014, 1558],
 [1, 1461, 2006],
 [1, 1357, 1890],
 [1, 2076, 242

We are not dealing with all images that are the same in size, so we need to determine the minimum image size and maximum image size to resize images.

In [40]:
# initialize variables
min_c, min_h, min_w = tensor_counts[0][0], tensor_counts[0][1], tensor_counts[0][2]
max_c , max_h, max_w = min_c, min_h, min_w

for shape in tensor_counts:
    if shape[0] < min_c:
        min_c = shape[0]
    elif shape[0] > max_c:
        max_c = shape[0]
    if shape[1] < min_h:
        min_h = shape[1]
    elif shape[1] > max_h:
        max_h = shape[1]
    if shape[2] < min_w:
        min_w = shape[2]
    elif shape[2] > max_w:
        max_w = shape[2]

In [41]:
min_c, min_h, min_w, max_c , max_h, max_w

(1, 127, 384, 3, 2713, 2916)

### Flatten all images to either a single channel or multichannel

#### To a single channel

In [60]:
# test RGB image

test_rgb_path = r"..\..\data\extracted\chest_xray\train\PNEUMONIA\person1298_virus_2228.jpeg"
test_rgb_img = tv.io.read_image(test_rgb_path)

In [61]:
test_rgb_img.shape

torch.Size([3, 503, 714])

In [63]:
rgb_grey_img = TF.rgb_to_grayscale(test_rgb_img) #you can define it to 3 for rgb 

In [64]:
rgb_grey_img.shape

torch.Size([1, 503, 714])

### Resize images

In [55]:
new_img = TF.resize(img_tensor, [244, 244])

### Flatten image to a tensor size of 1

In [25]:
img_tensor_flat = tv.io.encode_jpeg(img_tensor)

In [26]:
img_tensor_flat.shape

torch.Size([284473])

In [42]:
img_tensor_flat

tensor([255, 216, 255,  ...,  87, 255, 217], dtype=torch.uint8)