In [None]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from collections import defaultdict
import tifffile as tif
from torch.utils.data import Dataset, DataLoader, Sampler, BatchSampler
from sklearn.model_selection import train_test_split
import geopandas as gpd
from shapely.geometry import box
import rasterio
from rasterio.features import rasterize



In [None]:
Train_images_path = 'Train_Data'
Train_polygons_path = 'build_poly'  

def Data_mapping(images, masks):


    image_files = sorted(glob.glob(os.path.join(images, '*.tif')))
    mask_files = sorted(glob.glob(os.path.join(masks, '*.geojson')))

    if len(image_files) != len(mask_files):
        raise ValueError("Number of image files and mask files do not match")
    
    linked_files = [{'image': img_file, 'polygons': mask_file} for img_file, mask_file in zip(image_files, mask_files)]
    
    return linked_files

DATA_list = Data_mapping(Train_images_path, Train_polygons_path)
DATA_list[500]


In [None]:
def process_image_and_mask(data_entry):
    image_path = data_entry['image']
    geojson_path = data_entry['polygons']

    polygons = gpd.read_file(geojson_path)

    with rasterio.open(image_path) as src:
        image_shape = (src.height, src.width)
        transform = src.transform
        raster_crs = src.crs

    polygons = polygons.to_crs(raster_crs)

    mask = rasterize(
        [(geom, 1) for geom in polygons.geometry],
        out_shape=image_shape,
        transform=transform,
        fill=0,
        dtype='uint8'
    )

    with rasterio.open(image_path) as src:
        image = src.read().astype(np.float32)
        image /= image.max()  # Normalisation

    return image.transpose(1, 2, 0), mask

def process_dataset(data_list):
    images = []
    masks = []
    for data_entry in data_list:
        image, mask = process_image_and_mask(data_entry)
        images.append(image)
        masks.append(mask)
    return images, masks


data_image, data_mask = process_dataset(DATA_list)

In [None]:
def resize_images(images, target_size=512):
    """
    Redimensionne les images pour qu'elles aient des dimensions multiples de 32.
    Args:
        images (list): Liste des images.
        target_size (int): Taille cible.
    Returns:
        np.ndarray: Images redimensionnées.
    """
    resized_images = [cv2.resize(img, (target_size, target_size)) for img in images]
    return resized_images

def resize_masks(masks, target_size=512):
    """
    Redimensionne les masques pour qu'ils aient des dimensions multiples de 32.
    Args:
        masks (list): Liste des masques.
        target_size (int): Taille cible.
    Returns:
        np.ndarray: Masques redimensionnés.
    """
    resized_masks = [cv2.resize(mask, (target_size, target_size), interpolation=cv2.INTER_NEAREST) for mask in masks]
    return resized_masks

data_image = resize_images(data_image)
data_mask = resize_masks(data_mask)

print("Image shape after resizing:", data_image[0].shape)

print("Mask shape after resizing:", data_mask[0].shape)

In [None]:
x1=np.array(data_image)
x2=np.array(data_mask)

In [None]:
np.save('x1.npy', x1)
np.save('x2.npy', x2)