In [1]:
!pip install split-folders

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [2]:
import shutil
import os
import splitfolders
import re
import pandas as pd

In [3]:
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [29]:
root_path = 'gdrive/MyDrive/Explainable_Wound_Classification/'

In [30]:
#load the label probabilities for the images
labels_df = pd.read_csv(root_path + 'Cropped_Images_Wound_Stage_Probabilities.csv', index_col='Image')
labels_df.head()

Unnamed: 0_level_0,hemostasis,inflammatory,proliferative,maturation
Image,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Day 8_A8-4-L.png,0.181818,0.090909,0.545455,0.181818
Day 4_A8-3-R.png,0.090909,0.909091,0.0,0.0
Day 14_Y8-4-L.png,0.0,0.0,0.090909,0.909091
Day 7_Y8-4-L.png,0.0,0.0,0.454545,0.545455
Day 2_A8-1-L.png,0.181818,0.727273,0.090909,0.0


In [31]:
#get most probable label for each image
labels_df['Label'] = labels_df.idxmax(axis='columns')
labels_df.head()

Unnamed: 0_level_0,hemostasis,inflammatory,proliferative,maturation,Label
Image,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Day 8_A8-4-L.png,0.181818,0.090909,0.545455,0.181818,proliferative
Day 4_A8-3-R.png,0.090909,0.909091,0.0,0.0,inflammatory
Day 14_Y8-4-L.png,0.0,0.0,0.090909,0.909091,maturation
Day 7_Y8-4-L.png,0.0,0.0,0.454545,0.545455,maturation
Day 2_A8-1-L.png,0.181818,0.727273,0.090909,0.0,inflammatory


In [34]:
#get properties of images from name

props = labels_df.index.map(lambda x: re.match('^Day (\d+)_(Y|A)8-(\d)-(L|R)', x).groups())

labels_df['Day'] = props.map(lambda x: int(x[0]))
labels_df['Age'] = props.map(lambda x: x[1])
labels_df['Mouse'] = props.map(lambda x: int(x[2]))
labels_df['Side'] = props.map(lambda x: x[3])

labels_df.head()

Unnamed: 0_level_0,hemostasis,inflammatory,proliferative,maturation,Label,Day,Age,Mouse,Side
Image,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Day 8_A8-4-L.png,0.181818,0.090909,0.545455,0.181818,proliferative,8,A,4,L
Day 4_A8-3-R.png,0.090909,0.909091,0.0,0.0,inflammatory,4,A,3,R
Day 14_Y8-4-L.png,0.0,0.0,0.090909,0.909091,maturation,14,Y,4,L
Day 7_Y8-4-L.png,0.0,0.0,0.454545,0.545455,maturation,7,Y,4,L
Day 2_A8-1-L.png,0.181818,0.727273,0.090909,0.0,inflammatory,2,A,1,L


In [44]:
#average day of image for each label
labels_df.groupby('Label')['Day'].mean()

Label
hemostasis        1.411765
inflammatory      4.821429
maturation       12.645161
proliferative     9.127907
Name: Day, dtype: float64

In [45]:
#average day of image for each label by rate age
print('Age: Aged')
print(labels_df[labels_df['Age'] == 'A'].groupby('Label')['Day'].mean())

print('\n\nAge: Young')
print(labels_df[labels_df['Age'] == 'Y'].groupby('Label')['Day'].mean())

Age: Aged
Label
hemostasis        1.217391
inflammatory      5.000000
maturation       13.133333
proliferative    10.277778
Name: Day, dtype: float64


Age: Young
Label
hemostasis        1.571429
inflammatory      4.500000
maturation       12.489362
proliferative     7.187500
Name: Day, dtype: float64


In [49]:
#separate images into folders based on label
os.makedirs(root_path + 'Labeled_images', exist_ok=True)
os.makedirs(root_path + 'test', exist_ok=True) # for our separate test images

for label in labels_df['Label'].unique():
    os.makedirs(root_path + 'Labeled_images/' + label, exist_ok=True)
    os.makedirs(root_path + 'test/' + label, exist_ok=True)

    for index, row in labels_df[labels_df['Label'] == label].iterrows():
        if row['Age'] == 'Y' and row['Mouse'] == 4 and row['Side'] == 'L' or row['Age'] == 'A' and row['Mouse'] == 1 and row['Side'] == 'R':
            shutil.copy(root_path + 'Circle_Cropped_images/' + index, root_path + 'test/' + label + '/')
        else:
            shutil.copy(root_path + 'Circle_Cropped_images/' + index, root_path + 'Labeled_images/' + label + '/')

In [50]:
#split labeled images into train and val
splitfolders.ratio(root_path + 'Labeled_images', output=root_path + 'Split_Labeled_images', seed=1492, ratio=(.8, 0.2))

Copying files: 223 files [00:02, 84.65 files/s]


In [51]:
!mv /content/gdrive/MyDrive/Explainable_Wound_Classification/test /content/gdrive/MyDrive/Explainable_Wound_Classification/Split_Labeled_images/test

In [52]:
!rm -r /content/gdrive/MyDrive/Explainable_Wound_Classification/Labeled_images