In [44]:
import io
from PIL import Image
from pathlib import Path
import pandas as pd

from transforms.constants import LEVELS, PERTURBATIONS

from google.cloud import storage
BUCKET_NAME = 'dados_brutos_chexphoto'

client = storage.Client()
bucket = client.get_bucket(BUCKET_NAME)

In [45]:
metadata = pd.read_parquet('Data/metadata.parquet')
metadata.head(10)

Unnamed: 0,source_path,split,transformation,id,destination_path
0,CheXphoto-v1.0/train/natural/iphone/patient002...,train,identity,0,Data/processed_data/train/identity/0.jpg
1,CheXphoto-v1.0/train/natural/iphone/patient002...,train,identity,1,Data/processed_data/train/identity/1.jpg
2,CheXphoto-v1.0/train/natural/iphone/patient005...,train,identity,2,Data/processed_data/train/identity/2.jpg
3,CheXphoto-v1.0/train/natural/iphone/patient005...,train,identity,3,Data/processed_data/train/identity/3.jpg
4,CheXphoto-v1.0/train/natural/iphone/patient007...,train,identity,4,Data/processed_data/train/identity/4.jpg
5,CheXphoto-v1.0/train/natural/iphone/patient007...,train,identity,5,Data/processed_data/train/identity/5.jpg
6,CheXphoto-v1.0/train/natural/iphone/patient015...,train,identity,6,Data/processed_data/train/identity/6.jpg
7,CheXphoto-v1.0/train/natural/iphone/patient015...,train,identity,7,Data/processed_data/train/identity/7.jpg
8,CheXphoto-v1.0/train/natural/iphone/patient015...,train,identity,8,Data/processed_data/train/identity/8.jpg
9,CheXphoto-v1.0/train/natural/iphone/patient020...,train,identity,9,Data/processed_data/train/identity/9.jpg


In [46]:
def apply_perturbation(perturbation, level, src_img):
    """Apply the specified perturbation to src_img.

    Args:
        perturbation (str): name of perturbation to be applied
        level (int): degree of perturbation (from 1 to 4)
        src_img (Image): the image to perturb

    Returns:
        (Image): the perturbed image

    """
    if perturbation in PERTURBATIONS:
        return PERTURBATIONS[perturbation](level, src_img)
    raise NotImplementedError()

def process_perturbation(path, perturbation, level, split, dst_path):
    dst_path = Path(dst_path)
    dst_path.parent.mkdir(parents=True, exist_ok=True)
    
    if not dst_path.is_file():
        # Download the image
        blob = bucket.get_blob(path).download_as_string()
        bytes = io.BytesIO(blob)
        src_img = Image.open(bytes)

        dst_img = apply_perturbation(perturbation, level, src_img)
        #dst_img = apply_perturbation(perturbation2, level, dst_img)
        #dst_img = apply_perturbation(perturbation3, level, dst_img)
        # write stuff to disk

        dst_img.save(dst_path)

In [47]:
def transform(row):
    process_perturbation(path=row.source_path, 
                         split=row.split,
                         perturbation=row.transformation,
                         level=3, 
                         dst_path=row.destination_path)

In [48]:
metadata.apply(transform, axis=1)

0        None
1        None
2        None
3        None
4        None
         ... 
19300    None
19301    None
19302    None
19303    None
19304    None
Length: 19305, dtype: object

In [None]:
## n√£o funcionando

from tqdm import tqdm
import concurrent.futures

# generate the image using parallel processing
with concurrent.futures.ProcessPoolExecutor() as executor:
    results = [executor.submit(process_perturbation, 
                               row.source_path, 
                               row.split,
                               row.transformation,
                               row.destination_path) for index, row in metadata.iterrows()]

    for f in tqdm(concurrent.futures.as_completed(results), total=len(metadata)):
        pass
    
