In [1]:
import os
import pandas as pd
import shutil
import imageio
import imgaug as ia
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as img

from imgaug import augmenters as iaa

In [2]:
def bcet(img):
    
    Lmin = int(np.min(img)) # MINIMUM OF INPUT IMAGE
    Lmax = int(np.max(img)) # MAXIMUM OF INPUT IMAGE
    Lmean = int(np.mean(img)) #MEAN OF INPUT IMAGE
    LMssum = int(np.mean(img * img)) #MEAN SQUARE SUM OF INPUT IMAGE

    Gmin = 0 #MINIMUM OF OUTPUT IMAGE
    Gmax = 255 #MAXIMUM OF OUTPUT IMAGE
    Gmean = 110 #MEAN OF OUTPUT IMAGE

    bnum = Lmax * Lmax *(Gmean-Gmin) - LMssum*(Gmax-Gmin) + Lmin * Lmin *(Gmax-Gmean)
    bden = 2*(Lmax*(Gmean-Gmin)-Lmean*(Gmax-Gmin)+Lmin*(Gmax-Gmean))

    b = bnum/bden

    a = (Gmax-Gmin)/((Lmax-Lmin)*(Lmax+Lmin-2*b))

    c = Gmin - a*(Lmin-b) * (Lmin-b)

    y = a*(img-b) * (img-b) +c #PARABOLIC FUNCTION
    y = np.array(y, dtype=np.uint8)

    return y

## Input

In [3]:
# for data cleaning
data = "annotations/covid-severity-scores.csv"
raw_image_dir = "images"
error_file = ['ciaa199.pdf-001-a.png' , 
              'ciaa199.pdf-001-b.png', 
              'ciaa199.pdf-001-c.png', 
              '6C94A287-C059-46A0-8600-AFB95F4727B7.jpeg', 
              '53EC07C9-5CC6-4BE4-9B6F-D7B0D72AAA7E.jpeg', 
              'covid-19-rapidly-progressive-acute-respiratory-distress-syndrome-ards-admission.jpg', 
              'covid-19-pneumonia-28.png',
              '1.CXRCTThoraximagesofCOVID-19fromSingapore.pdf-002-fig3b.png',
              '1.CXRCTThoraximagesofCOVID-19fromSingapore.pdf-003-fig4b.png',
              '85E52EB3-56E9-4D67-82DA-DEA247C82886.jpeg',
              'all14238-fig-0001-m-b.jpg',
              'all14238-fig-0001-m-c.jpg',
              'B2D20576-00B7-4519-A415-72DE29C90C34.jpeg',
              'covid-19-pneumonia-12.jpg',
              'covid-19-pneumonia-rapidly-progressive-3-days.jpg',
              'covid-19-rapidly-progressive-acute-respiratory-distress-syndrome-ards-day-1.jpg',
              'figure1-5e71be566aa8714a04de3386-98-left.jpg',
              'lancet-case2a.jpg',
              'lancet-case2b.jpg',
              'nejmoa2001191_f3-PA.jpeg',
              'nejmoa2001191_f4.jpeg']

In [4]:
# for augmentation
function_names = ["Gamma", "CLAHE", "BCET", "original", "TransalteY_U", "TransalteY_D", "TransalteX_U", "TransalteX_D", "Rotate_P", "Rotate_N"]
output_dir = "original_post_processed"
output_csv_name = "augmented_covid-severity-scores.csv"

## Raw data cleaning
You need to prepare the following contents first:
* data -> raw covid-severity-scores.csv
* error_file -> you need to specify which files are needed to be deleted
* raw_image_dir -> 原圖群資料夾

In [5]:
df = pd.read_csv(data)
print(df.head())
# delete description data
df = df.iloc[5:,:]
#df.head()
# data cleaning

df_cleaned = df[~df.iloc[:,0].isin(error_file)]

  ## COVID-19 Pneumonia Severity Scores (Mean of 3 expert raters)  \
0  ## License: CC BY-SA Creative Commons Attribut...                
1  ## Citation: Joseph Paul Cohen et al. Predicti...                
2  ## geographic_extent_mean: The extent of lung ...                
3  ## opacity_mean: The degree of opacity. The to...                
4                                           filename                

        Unnamed: 1    Unnamed: 2  
0              NaN           NaN  
1              NaN           NaN  
2              NaN           NaN  
3              NaN           NaN  
4  geographic_mean  opacity_mean  


## data augmentation
You need to prepare the following contents first:
* function_names -> 要選用的擴增功能"們"
* output_dir -> 產圖, csv 的資料夾

In [6]:
output_csv = df.copy()
output_csv.columns = ["name", "geographic_mean", "opacity_mean"]

if not os.path.exists(output_dir):
        os.makedirs(output_dir)

total_dir = os.path.join(output_dir, "total")
if not os.path.exists(total_dir):
        os.makedirs(total_dir)
        
save_dir = os.path.join(output_dir, "original")
if not os.path.exists(save_dir):
        os.makedirs(save_dir)
for data in df_cleaned.iloc[:,0].tolist():
    shutil.copy(os.path.join(raw_image_dir, data), save_dir)
    shutil.copy(os.path.join(raw_image_dir, data), total_dir)

for function_name in function_names:
    save_dir = os.path.join(output_dir, function_name)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    if function_name == "original":
        for data in df_cleaned.iloc[:,0].tolist():
            shutil.copy(os.path.join(raw_image_dir, data), save_dir)
            shutil.copy(os.path.join(raw_image_dir, data), total_dir)
    else:
        for index, row in df_cleaned.iterrows():
            data = row[0]
            geographic_mean = row[1]
            opacity_mean = row[2]
            data_name, ext = os.path.splitext(data)
            data_path = os.path.join(raw_image_dir, data)
            image = imageio.imread(data_path)

            # aug function select
            if function_name == "CLAHE":
                function = iaa.CLAHE()
            elif function_name == "Gamma":
                function = iaa.GammaContrast((0.5, 2.0))
            elif function_name == "TransalteY_U":
                function = iaa.TranslateY(percent=0.1)
            elif function_name == "TransalteY_D":
                function = iaa.TranslateY(percent=-0.1)
            elif function_name == "TransalteX_U":
                function = iaa.TranslateX(percent=0.1)
            elif function_name == "TransalteX_D":
                function = iaa.TranslateX(percent=-0.1)
            elif function_name == "Rotate_P":
                function = iaa.Rotate(5)
            elif function_name == "Rotate_N":
                function = iaa.Rotate(-5)

            if function_name != "BCET":
                image_aug = function(image=image)
            else:
                image_aug = bcet(image)
            newPath = os.path.join(save_dir, data_name+f"_{function_name}"+ext)
            new = pd.DataFrame([[data_name+f"_{function_name}"+ext, geographic_mean, opacity_mean]], columns=["name", "geographic_mean", "opacity_mean"])
            output_csv.append(new)
            imageio.imsave(newPath, image_aug)
            shutil.copy(newPath, total_dir)

In [7]:
output_csv.to_csv(os.path.join(output_dir,output_csv_name))