In [None]:
ndvi_image_path = "/mnt/data1tb/SAR2NDVI/new_dataset/train/2015-10-22_NDVI.tif"
vvvh_image_path = "/mnt/data1tb/SAR2NDVI/new_dataset/train/2016-04-26_VHVV.tif"

In [6]:
# Function to 
import glob

list_path = glob.glob('/mnt/data1tb/SAR2NDVI/new_dataset/train/*_VHVV.tif')

In [7]:
list_path

['/mnt/data1tb/SAR2NDVI/new_dataset/train/2021-10-25_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2024-05-17_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2021-01-18_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2022-12-24_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2024-04-02_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2018-01-19_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2022-04-05_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2019-09-21_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2023-03-09_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2023-03-14_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2019-10-01_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2018-03-20_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2023-10-05_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2017-12-20_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/train/2022-04-10_VHVV.tif',
 '/mnt/data1tb/SAR2NDVI/new_dataset/trai

In [2]:
import rasterio
from tqdm import tqdm

def save_vvvh_bands(input_image_path, output_vv_path, output_vh_path):
    # Open the multi-band image
    with rasterio.open(input_image_path) as src:
        # Read each band separately
        vv_band = src.read(1)  # Band 1 (VV)
        vh_band = src.read(2)  # Band 2 (VH)
        
        # Save the VV band
        with rasterio.open(
            output_vv_path,
            'w',
            driver='GTiff',
            height=vv_band.shape[0],
            width=vv_band.shape[1],
            count=1,
            dtype=vv_band.dtype,
            crs=src.crs,
            transform=src.transform,
        ) as vv_out:
            vv_out.write(vv_band, 1)
        
        # Save the VH band
        with rasterio.open(
            output_vh_path,
            'w',
            driver='GTiff',
            height=vh_band.shape[0],
            width=vh_band.shape[1],
            count=1,
            dtype=vh_band.dtype,
            crs=src.crs,
            transform=src.transform,
        ) as vh_out:
            vh_out.write(vh_band, 1)


        

In [60]:
import os
import glob
import shutil
import random
from datetime import datetime
import rasterio

# Paths
mask_input_folder = "/mnt/data1tb/SAR2NDVI/new_dataset/SAR2NDVI_SAMPLES_MASK"
nonmask_input_folder = "/mnt/data1tb/SAR2NDVI/new_dataset/SAR2NDVI_SAMPLES_NONMASK"
output_folder = "/mnt/data1tb/SAR2NDVI/temp"

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Get all dates from the file names
ndvi_files = glob.glob(os.path.join(mask_input_folder, "*_NDVI.tif"))
vvvh_files = glob.glob(os.path.join(mask_input_folder, "*_VHVV.tif"))

# Extract dates and organize by type
dates = sorted(set([os.path.basename(f).split("_")[0]for f in ndvi_files]))
random.shuffle(dates)

# Split data into train, test, and validation (80%, 10%, 10% split)
split_train = int(0.8 * len(dates))
split_val = int(0.9 * len(dates))
train_dates, val_dates, test_dates = dates[:split_train], dates[split_train:split_val], dates[split_val:]

# Mapping for set types
dataset_split = {
    "raw_002": train_dates,
    "raw_003": val_dates,
    "raw_001": test_dates
}

# Create folders and copy files
for set_name, set_dates in dataset_split.items():
    for date in set_dates:
        date_new_format = date.replace("-", "")
        # Create subfolders for each date under NDVI and VV-VH directories
        ndvi_folder = os.path.join(output_folder, "sentinel2", set_name)
        vvvh_folder = os.path.join(output_folder, "dataset", set_name, date.replace("-", ""))
        os.makedirs(ndvi_folder, exist_ok=True)
        os.makedirs(vvvh_folder, exist_ok=True)

        # Find and move NDVI image
        ndvi_image = os.path.join(mask_input_folder, f"{date}_NDVI.tif")
        if os.path.exists(ndvi_image):
            shutil.copy(ndvi_image, os.path.join(ndvi_folder, f"{date_new_format}_ndvi_crop.tif"))
            shutil.copy(ndvi_image, os.path.join(vvvh_folder, f"{date_new_format}_ndvi_crop.tif"))
            
        nonmask_ndvi_image = os.path.join(nonmask_input_folder, f"{date}_NDVI.tif")
        if os.path.exists(ndvi_image):
            shutil.copy(nonmask_ndvi_image, os.path.join(ndvi_folder, f"{date_new_format}_ndvi.tif"))
            shutil.copy(nonmask_ndvi_image, os.path.join(vvvh_folder, f"{date_new_format}_ndvi.tif"))

        # Find and separate VV and VH bands from VV-VH image
        vvvh_image = os.path.join(mask_input_folder, f"{date}_VHVV.tif")
        if os.path.exists(vvvh_image):
            with rasterio.open(vvvh_image) as src:
                vv_band = src.read(1)  # Band 1 (VV)
                vh_band = src.read(2)  # Band 2 (VH)
                
                # Save VV band
                vv_path = os.path.join(vvvh_folder, f"{date_new_format}_VV_crop.tif")
                with rasterio.open(
                    vv_path,
                    "w",
                    driver="GTiff",
                    height=vv_band.shape[0],
                    width=vv_band.shape[1],
                    count=1,
                    dtype=vv_band.dtype,
                    crs=src.crs,
                    transform=src.transform,
                ) as vv_out:
                    vv_out.write(vv_band, 1)

                # Save VH band
                vh_path = os.path.join(vvvh_folder, f"{date_new_format}_VH_crop.tif")
                with rasterio.open(
                    vh_path,
                    "w",
                    driver="GTiff",
                    height=vh_band.shape[0],
                    width=vh_band.shape[1],
                    count=1,
                    dtype=vh_band.dtype,
                    crs=src.crs,
                    transform=src.transform,
                ) as vh_out:
                    vh_out.write(vh_band, 1)
    
        vvvh_image = os.path.join(nonmask_input_folder, f"{date}_VHVV.tif")
        if os.path.exists(vvvh_image):
            with rasterio.open(vvvh_image) as src:
                vv_band = src.read(1)  # Band 1 (VV)
                vh_band = src.read(2)  # Band 2 (VH)
                
                # Save VV band
                vv_path = os.path.join(vvvh_folder, f"{date_new_format}_VV.tif")
                with rasterio.open(
                    vv_path,
                    "w",
                    driver="GTiff",
                    height=vv_band.shape[0],
                    width=vv_band.shape[1],
                    count=1,
                    dtype=vv_band.dtype,
                    crs=src.crs,
                    transform=src.transform,
                ) as vv_out:
                    vv_out.write(vv_band, 1)

                # Save VH band
                vh_path = os.path.join(vvvh_folder, f"{date_new_format}_VH.tif")
                with rasterio.open(
                    vh_path,
                    "w",
                    driver="GTiff",
                    height=vh_band.shape[0],
                    width=vh_band.shape[1],
                    count=1,
                    dtype=vh_band.dtype,
                    crs=src.crs,
                    transform=src.transform,
                ) as vh_out:
                    vh_out.write(vh_band, 1)

print("Dataset organized successfully.")

Dataset organized successfully.


In [44]:
import os
import numpy as np
import rasterio
from scipy.ndimage import generic_filter

def replace_nan_with_mean(arr):
    """Replace NaN values with the global mean, and handle fully NaN arrays."""
    # Check if entire array is NaN, which would make np.nanmean return NaN
    if np.isnan(arr).all():
        arr.fill(0)  # Fill with 0 or any other default value for fully NaN arrays
    else:
        mean_value = np.nanmean(arr)
        arr[~np.isnan(arr)] = mean_value
    return arr

def nanmean_filter(values):
    """Neighborhood filter function to replace NaN values with local mean."""
    # Calculate mean of non-NaN neighbors
    if np.isnan(values).all():
        return np.nan  # If all neighbors are NaN, return NaN to handle separately
    else:
        return np.nanmean(values)

def process_image(image_path, is_ndvi=False):
    with rasterio.open(image_path) as src:
        image = src.read()
        profile = src.profile
    
    # Replace NaN values using neighborhood filtering
    image_list = []
    for i in range(image.shape[0]):
        image1 = image[i]
        if np.isnan(image[i]).any():
            # Apply a neighborhood filter to replace NaNs with mean of neighbors
            image1 = generic_filter(image[i], nanmean_filter, size=10)
        
        # Fallback: If still NaN, apply global mean replacement
        for image_axis in range(image1.shape[0]):
            mean_value = np.nanmean(image1[image_axis])
            image1[image_axis][~np.isnan(image1[image_axis])] = mean_value

        # For NDVI images, replace cropped (0) pixels with -100
        if is_ndvi:
            image1[image1 == 0] = -100
        
        image_list.append(image1)
        
    image = np.stack(image_list, axis=0)
        
    # Save the modified image
    with rasterio.open(image_path, 'w', **profile) as dst:
        dst.write(image)

def process_folders(ndvi_folder, vvvh_folder):
    for folder in [ndvi_folder, vvvh_folder]:
        for file_name in os.listdir(folder):
            if file_name.endswith("_ndvi.tif"):
                # NDVI image processing
                process_image(os.path.join(folder, file_name), is_ndvi=True)
            elif file_name.endswith("_VHVV.tif"):
                # VV/VH image processing
                process_image(os.path.join(folder, file_name), is_ndvi=False)

# Example usage
non_crop_folder = '/mnt/data1tb/SAR2NDVI/new_dataset/SAR2NDVI_SAMPLES_NONMASK'
crop_folder = '/mnt/data1tb/SAR2NDVI/new_dataset/SAR2NDVI_SAMPLES_MASK'
process_folders(non_crop_folder, crop_folder)


In [56]:
import os
import numpy as np
import rasterio

def check_for_nan(folder):
    nan_files = []  # List to store files with NaN values
    for file_name in os.listdir(folder):
        if file_name.endswith("NDVI.tif"):
            file_path = os.path.join(folder, file_name)
            with rasterio.open(file_path) as src:
                image = src.read()  # Read all bands as a numpy array
                if np.isnan(image).any():
                    nan_files.append(file_name)
                    print(f"NaN values found in {file_path}")
    return nan_files

# Example usage
list_files = check_for_nan("/mnt/data1tb/SAR2NDVI/new_dataset/SAR2NDVI_SAMPLES_MASK")


NotADirectoryError: [Errno 20] Not a directory: '/mnt/data1tb/SAR2NDVI/temp/dataset/raw_001/20160916/20160916_VH_crop.tif'

In [47]:
len(list_files)

0

In [42]:
for file in list_files:
    if file.endswith("VHVV.tif"):
        print(file)

In [58]:
file = "/mnt/data1tb/SAR2NDVI/temp/dataset/raw_001/20160916/20160916_VH_crop.tif"
with rasterio.open(file) as src:
    ndvi = src.read()
print(ndvi[0][1])

[-15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271965 -15.00271965 -15.00271965 -15.00271965 -15.00271965
 -15.00271