<a href="https://colab.research.google.com/github/carlosfmorenog/CMM536_Data_Loading_Tutorial/blob/main/CMM536_data_loading_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# clone the github repo containing the images
!git clone https://github.com/carlosfmorenog/CMM536_Data_Loading_Tutorial

Cloning into 'CMM536_Data_Loading_Tutorial'...
remote: Enumerating objects: 44, done.[K
remote: Counting objects: 100% (44/44), done.[K
remote: Compressing objects: 100% (42/42), done.[K
remote: Total 44 (delta 10), reused 8 (delta 1), pack-reused 0[K
Receiving objects: 100% (44/44), 768.36 KiB | 8.44 MiB/s, done.
Resolving deltas: 100% (10/10), done.


# Version 1: Loading the images and converting both the 2D (flattened) and 3D lists into numpy arrays

In [None]:
import os
import numpy as np
import cv2
resize_width = 100
resize_height = 100
path = '/content/CMM536_Data_Loading_Tutorial/data'
images = [] # List to append the images in 2D, thus resulting on a 3D array.
imagesflattened = [] # List to append the images in 1D (flattened), thus resulting on a 2D array.
target = [] # List to append the target (vector of labels)

for root, dirs, files in os.walk(path):
     for file in files:
        with open(os.path.join(root, file), "r") as auto:
            try:
                img = cv2.imread(root+'/'+file, 0)
                img = cv2.resize(img, (resize_width, resize_height))
                images.append(img)
                # Append the flattened image to the pixel repo
                imagesflattened.append(img.flatten())
                # Append the folder where the image is to the target list
                target.append(root.replace(path,'').replace('\\','').replace('/',''))
            except Exception as e:
                print("Invalid file "+file+" skipped.")

# Convert the lists into numpy arrays
images = np.array(images)
imagesflattened_numpyarray = np.array(imagesflattened)
print("Shape of the 3D image numpy array ", images.shape)
print("Sample of the 2D flattened numpy array")
print(imagesflattened_numpyarray)
print("List of class labels")
print(target)


Shape of the 3D image numpy array  (6, 100, 100)
Sample of the 2D flattened numpy array
[[185 190 197 ... 146 146 146]
 [  8   9  11 ...  12   6 132]
 [190 190 191 ...  38  37  38]
 [109 119 112 ...  12  14  34]
 [101 101 101 ... 178 176 182]
 [ 59  65  66 ... 127 132 111]]
List of class labels
['nomask', 'nomask', 'mask', 'mask', 'mask', 'mask']


# Version 2: Loading the images and converting the 2D (flattened) list into a Pandas Dataframe

The 3D list is still converted into a numpy array since you cannot convert a 3D list into a 2D table!

In [None]:
import os
import numpy as np
import cv2
from pandas import DataFrame

# I used a smaller resize factor to render the Pandas DataFrame better
resize_width = 25
resize_height = 25
path = '/content/CMM536_Data_Loading_Tutorial/data'
images = [] # List to append the images in 2D, thus resulting on a 3D array.
imagesflattened = [] # List to append the images in 1D (flattened), thus resulting on a 2D Pandas DataFrane.
target = [] # List to append the target (vector of labels)

for root, dirs, files in os.walk(path):
     for file in files:
        with open(os.path.join(root, file), "r") as auto:
            try:
                img = cv2.imread(root+'/'+file, 0)
                img = cv2.resize(img, (resize_width, resize_height))
                images.append(img)
                # Append the flattened image to the pixel repo
                imagesflattened.append(img.flatten())
                # Append the folder where the image is to the target list
                target.append(root.replace(path,'').replace('\\','').replace('/',''))
            except Exception as e:
                print("Invalid file "+file+" skipped.")

# convert the 3D list into numpy array
images = np.array(images)
print("Shape of the 3D image numpy array ", images.shape)
# Convert the 2D list into pandas dataframe
imagesflattened_pandas = DataFrame(imagesflattened)
# add the class as the last column of the dataframe
imagesflattened_pandas['class']=DataFrame(target)
print("Sample of the 2D flattened Pandas DataFrame")
imagesflattened_pandas

Shape of the 3D image numpy array  (6, 25, 25)
Sample of the 2D flattened Pandas DataFrame


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,616,617,618,619,620,621,622,623,624,class
0,206,155,149,153,153,153,151,153,152,153,...,147,153,142,149,148,146,146,146,146,nomask
1,12,20,8,10,17,9,10,13,14,14,...,7,8,8,4,9,8,10,9,145,nomask
2,190,192,190,192,192,190,193,192,198,146,...,25,39,71,79,67,52,45,44,41,mask
3,137,40,80,92,161,123,135,128,194,72,...,19,19,118,15,18,15,30,24,12,mask
4,97,95,92,160,153,78,165,198,118,96,...,190,191,187,178,176,181,185,181,182,mask
5,63,83,49,108,188,11,28,109,49,126,...,120,101,133,179,178,136,127,117,128,mask


The values in the data structures are different because in the `numpy array` the images are resized to $100 \times 100$ while the `pandas dataframe` contains $25 \times 25$ flattened images (plus the **class/label/target** in the last column).