In [1]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
from multiprocessing import Pool
import pandas as pd

path = "datasets/"

landsat8_train = path + "landsat8_train/train/"

#os.system("rm " + dst + "/*")
dst = path + "landsat8_oneimage/"
if not os.path.exists(dst):
    os.makedirs(dst)

'Landsat8_QA_PIXEL_-54.48_-3.33_2013_03_27.tiff'

def get_name_landsat(row, band):
    return f"Landsat8_{band}_{row['lon']:.2f}_{row['lat']:.2f}_{row['date'].year}_{row['date'].month:02d}_{row['date'].day:02d}.tiff"

images = np.array(os.listdir(landsat8_train))
images.sort()

images = images[:int(images.size/9)]

def get_dict(name):
    tokens = name.split("_")
    
    lat = float(tokens[4])
    lon = float(tokens[3])
    
    y = int(tokens[5])
    m = int(tokens[6])
    d = int(tokens[7].split('.')[0])
    
    return {"date": pd.Timestamp(y, m, d), "lat": lat, "lon": lon}

with Pool(16) as p:
    data = p.map(get_dict, images)

landsat_df = pd.DataFrame(data = data)
landsat_df

Unnamed: 0,date,lat,lon
0,2013-03-27,-3.33,-54.48
1,2013-05-20,-3.33,-54.48
2,2013-06-05,-3.33,-54.48
3,2013-06-21,-3.33,-54.48
4,2013-07-07,-3.33,-54.48
...,...,...,...
233220,2021-08-14,-4.39,-55.20
233221,2021-09-15,-4.39,-55.20
233222,2021-10-01,-4.39,-55.20
233223,2021-10-17,-4.39,-55.20


In [5]:
# Funções 

def load_image(img):
    img = Image.open(img)
    img = np.array(img, dtype = np.int16)

    #img -= 10000
    #img /= 35000
    
    return img

def load_qa(img):
    # https://www.usgs.gov/landsat-missions/landsat-collection-1-level-1-quality-assessment-band
    with Image.open(img) as i:
        img = np.array(i)
    img = (img & 0b11111) == 0
    img = img.astype(np.int16)
    return img

def get_qa_proportion(lr):
    qa = load_qa(landsat8_train + get_name_landsat(lr[1], "QA_PIXEL"))
    return (qa == 0).mean()

def get_image_and_qa(lr):
    bands = []
    for band in [ "SR_B1", "SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", "SR_B7" ]:
        name = get_name_landsat(lr, band)
        bands.append(load_image(landsat8_train + name))
    img = np.stack(bands, axis = -1)

    qa = load_qa(landsat8_train + get_name_landsat(lr, "QA_PIXEL"))

    return np.concatenate([img, qa[:, :, None]], axis = -1)

In [6]:
# Iterate over the annotated deforestation images
def save_image(j):
    j = j[1]
    image = get_image_and_qa(j)
    name = f"Labeled_{j['lat']:.2f}_{j['lon']:.2f}_{j['date'].year}_{j['date'].month:02d}_{j['date'].day:02d}.npy"
    np.save(dst + name, image)

for i in tqdm(landsat_df.iterrows(), total = landsat_df.shape[0]):
    save_image(i)

100%|██████████| 233225/233225 [9:08:15<00:00,  7.09it/s]   


In [22]:
import h5py
import os
import numpy as np
from tqdm import tqdm
from PIL import Image

# Path to the directory containing the files you want to store
file_directory = 'datasets/deforestation_train_resized/train/'
files = os.listdir(file_directory)
files.sort()

# Path to the HDF5 file
hdf5_path = 'deforestation.hdf5'

# Create a new HDF5 file
with h5py.File(hdf5_path, 'w') as f:
    for filename in tqdm(files):
        file_path = os.path.join(file_directory, filename)
        if os.path.isfile(file_path):
            aa = Image.open(file_path) # bool array
            bb = np.array(aa, dtype=np.uint8)
            f.create_dataset(filename, data=bb)

100%|██████████| 17215/17215 [01:53<00:00, 151.83it/s]


In [14]:
with h5py.File('datasets/with_labels.hdf5', 'r') as f:
    print(len(f.keys()))
    print(list(f.keys())[:10])

    img = f['Labeled_-3.33_-54.50_2016_08_01_0.00.npy'][:]
    print(img.shape)

17215
['Labeled_-3.33_-54.50_2016_08_01_0.00.npy', 'Labeled_-3.33_-54.50_2017_07_01_0.01.npy', 'Labeled_-3.33_-54.50_2017_08_01_0.01.npy', 'Labeled_-3.33_-54.50_2018_06_01_0.00.npy', 'Labeled_-3.33_-54.50_2018_08_01_0.00.npy', 'Labeled_-3.33_-54.50_2019_07_01_0.00.npy', 'Labeled_-3.33_-54.50_2019_08_01_0.00.npy', 'Labeled_-3.33_-54.50_2020_06_01_0.00.npy', 'Labeled_-3.33_-54.50_2020_08_01_0.00.npy', 'Labeled_-3.33_-54.50_2021_05_01_0.00.npy']
<class 'numpy.ndarray'>
(25, 85, 85)
