<a href="https://colab.research.google.com/github/adampotton/MDM3-Rep-3/blob/main/aerial_data_load.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
from google.colab import files, drive
drive.mount('/content/drive')
import os
import numpy as np
from tifffile import imread
from sklearn.model_selection import train_test_split
from PIL import Image
from tqdm import tqdm


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
folder_path = '/content/drive/MyDrive/Aerial Data'

def load_tif_files(folder_path):
    tif_arrays = []
    labels = []
    problematic_files = []
    for root, dirs, files in os.walk(folder_path):
        for file in tqdm(files, desc="Loading TIF files"):
            if file.endswith('.tif'):
                file_path = os.path.join(root, file)
                try:
                    img = Image.open(file_path)
                    img_array = np.array(img)
                    tif_arrays.append(img_array)
                    label = '_'.join(file.split('_')[:2])
                    labels.append(label)
                except Exception as e:
                    problematic_files.append(file_path)
                    print(f"Error loading {file_path}: {e}")
    if problematic_files:
        print("The following files couldn't be loaded properly:")
        for file_path in problematic_files:
            print(file_path)
    return tif_arrays, labels

tif_arrays, labels = load_tif_files(folder_path)
print(labels)

Loading TIF files: 0it [00:00, ?it/s]
Loading TIF files: 100%|██████████| 269/269 [00:10<00:00, 24.54it/s]
Loading TIF files: 100%|██████████| 578/578 [00:18<00:00, 31.84it/s]
Loading TIF files: 100%|██████████| 282/282 [00:10<00:00, 27.53it/s]
Loading TIF files: 100%|██████████| 98/98 [00:03<00:00, 27.62it/s]
Loading TIF files:  31%|███▏      | 10/32 [00:00<00:00, 94.44it/s]

Error loading /content/drive/MyDrive/Aerial Data/aerial_pinus_nigra/Pinus_nigra_2_50192_WEFL_NLF.tif: cannot identify image file '/content/drive/MyDrive/Aerial Data/aerial_pinus_nigra/Pinus_nigra_2_50192_WEFL_NLF.tif'


Loading TIF files: 100%|██████████| 32/32 [00:01<00:00, 21.16it/s]


Error loading /content/drive/MyDrive/Aerial Data/aerial_pinus_nigra/Pinus_nigra_9_51740_WEFL_NLF.tif: cannot identify image file '/content/drive/MyDrive/Aerial Data/aerial_pinus_nigra/Pinus_nigra_9_51740_WEFL_NLF.tif'


Loading TIF files: 100%|██████████| 136/136 [00:13<00:00, 10.12it/s]
Loading TIF files: 100%|██████████| 174/174 [00:09<00:00, 19.16it/s]
Loading TIF files: 100%|██████████| 50/50 [00:03<00:00, 16.35it/s]
Loading TIF files: 100%|██████████| 659/659 [00:42<00:00, 15.35it/s]
Loading TIF files: 100%|██████████| 263/263 [00:09<00:00, 28.56it/s]
Loading TIF files: 100%|██████████| 645/645 [00:35<00:00, 18.39it/s]
Loading TIF files: 100%|██████████| 282/282 [00:19<00:00, 14.54it/s]
Loading TIF files: 100%|██████████| 345/345 [00:15<00:00, 22.05it/s]
Loading TIF files: 100%|██████████| 260/260 [00:07<00:00, 35.84it/s]
Loading TIF files: 100%|██████████| 184/184 [00:10<00:00, 17.41it/s]
Loading TIF files: 100%|██████████| 21/21 [00:00<00:00, 42.70it/s] 
Loading TIF files: 100%|██████████| 46/46 [00:00<00:00, 126.29it/s]
Loading TIF files: 100%|██████████| 30/30 [00:00<00:00, 54.67it/s]
Loading TIF files: 100%|██████████| 256/256 [00:12<00:00, 20.50it/s]

The following files couldn't be loaded properly:
/content/drive/MyDrive/Aerial Data/aerial_pinus_nigra/Pinus_nigra_2_50192_WEFL_NLF.tif
/content/drive/MyDrive/Aerial Data/aerial_pinus_nigra/Pinus_nigra_9_51740_WEFL_NLF.tif
['Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', 'Pseudotsuga_menziesii', '




In [None]:
arrays_np = np.array(tif_arrays)
labels_np = np.array(labels)

train_images, rest_images, train_labels, rest_labels = train_test_split(
    arrays_np, labels_np, test_size=0.2, random_state=2, stratify=labels_np
)

test_images, validation_images, test_labels, validation_labels = train_test_split(
    rest_images, rest_labels, test_size=0.5, random_state=2, stratify=rest_labels
)


np.save('/content/drive/My Drive/Aerial Data/train_images', train_images)
np.save('/content/drive/My Drive/Aerial Data/train_labels', train_labels)
np.save('/content/drive/My Drive/Aerial Data/test_images', test_images)
np.save('/content/drive/My Drive/Aerial Data/test_labels', test_labels)
np.save('/content/drive/My Drive/Aerial Data/val_images', validation_images)
np.save('/content/drive/My Drive/Aerial Data/val_labels', validation_labels)