<a href="https://colab.research.google.com/github/adampotton/MDM3-Rep-3/blob/main/aerial_data_load.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#from google.colab import files, drive
#drive.mount('/content/drive')
import os
import numpy as np
from tifffile import imread
from sklearn.model_selection import train_test_split
from PIL import Image
from tqdm import tqdm


In [3]:
folder_path = r"C:\Users\bench\OneDrive\Documents\EMAT Year 3\MDM3\Phase C\ratio_adjusted_aerial_dataset"

def load_tif_files(folder_path):
    tif_arrays = []
    labels = []
    numbers = []
    problematic_files = []
    for root, dirs, files in os.walk(folder_path):
        for file in tqdm(files, desc="Loading TIF files"):
            if file.endswith('.tif'):
                file_path = os.path.join(root, file)
                try:
                    img = Image.open(file_path)
                    img_array = np.array(img)
                    tif_arrays.append(img_array)
                    label = '_'.join(file.split('_')[:2])
                    number = file.split('_')[3]
                    labels.append(label)
                    numbers.append(number)
                except Exception as e:
                    problematic_files.append(file_path)
                    print(f"Error loading {file_path}: {e}")
    if problematic_files:
        print("The following files couldn't be loaded properly:")
        for file_path in problematic_files:
            print(file_path)
    return tif_arrays, labels, numbers

tif_arrays, labels, numbers = load_tif_files(folder_path)
print(labels)
print(numbers)

Loading TIF files: 100%|██████████| 6/6 [00:00<?, ?it/s]
Loading TIF files: 100%|██████████| 385/385 [00:02<00:00, 190.06it/s]
Loading TIF files: 100%|██████████| 775/775 [00:05<00:00, 132.62it/s]
Loading TIF files: 100%|██████████| 488/488 [00:03<00:00, 132.99it/s]
Loading TIF files: 100%|██████████| 543/543 [00:03<00:00, 137.00it/s]
Loading TIF files: 100%|██████████| 2819/2819 [00:09<00:00, 288.19it/s]
Loading TIF files: 100%|██████████| 848/848 [00:06<00:00, 129.96it/s]
Loading TIF files: 100%|██████████| 344/344 [00:01<00:00, 274.87it/s]
Loading TIF files: 100%|██████████| 372/372 [00:02<00:00, 146.98it/s]
Loading TIF files: 100%|██████████| 2712/2712 [00:19<00:00, 138.08it/s]
Loading TIF files: 100%|██████████| 185/185 [00:00<00:00, 434.79it/s]
Loading TIF files: 100%|██████████| 264/264 [00:01<00:00, 139.45it/s]
Loading TIF files: 100%|██████████| 3948/3948 [00:28<00:00, 137.84it/s]
Loading TIF files: 100%|██████████| 180/180 [00:01<00:00, 135.87it/s]
Loading TIF files: 100%|███

['Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abies_alba', 'Abie




# Saving numpy arrrays without train test split

In [5]:
arrays_np = np.array(tif_arrays)
labels_np = np.array(labels)
numbers_np = np.array(numbers)
numbers_np = numbers_np.astype(int)

np.save(os.path.join(folder_path, 'aerial_99_images'), arrays_np)
np.save(os.path.join(folder_path, 'aerial_99_labels'), labels_np)
np.save(os.path.join(folder_path, 'aerial_99_numbers'), numbers_np)

# Saving numpy arrays with train test split

In [4]:
arrays_np = np.array(tif_arrays)
labels_np = np.array(labels)

train_images, rest_images, train_labels, rest_labels = train_test_split(
    arrays_np, labels_np, test_size=0.2, random_state=2, stratify=labels_np
)

test_images, validation_images, test_labels, validation_labels = train_test_split(
    rest_images, rest_labels, test_size=0.5, random_state=2, stratify=rest_labels
)

np.save(os.path.join(folder_path, 'train_images'), train_images)
np.save(os.path.join(folder_path, 'train_labels'), train_labels)
np.save(os.path.join(folder_path, 'val_images'), validation_images)
np.save(os.path.join(folder_path, 'val_labels'), validation_labels)
np.save(os.path.join(folder_path, 'test_images'), test_images)
np.save(os.path.join(folder_path, 'test_labels'), test_labels)


#np.save('/content/drive/My Drive/Aerial Data/train_images', train_images)
#np.save('/content/drive/My Drive/Aerial Data/train_labels', train_labels)
#np.save('/content/drive/My Drive/Aerial Data/test_images', test_images)
#np.save('/content/drive/My Drive/Aerial Data/test_labels', test_labels)
#np.save('/content/drive/My Drive/Aerial Data/val_images', validation_images)
#np.save('/content/drive/My Drive/Aerial Data/val_labels', validation_labels)