In [2]:
from osgeo import gdal
import numpy as np

In [3]:
MODIS_min = np.array([20, 412, 2, 35, 65, 2])[:, np.newaxis, np.newaxis]
MODIS_max = np.array([7113, 7766, 6484, 6788, 6564, 4735])[:, np.newaxis, np.newaxis]
MODIS_mean = np.array([860.19945305, 2475.44628988, 582.47434822, 887.45916135, 1735.29294439, 1034.11846855])[:, np.newaxis, np.newaxis]
MODIS_std = np.array([396.83556466, 671.3033849, 394.82372062, 378.09109307, 455.40488832, 364.96818188])[:, np.newaxis, np.newaxis]

S1_min = np.array([-48, -50])[:, np.newaxis, np.newaxis]
S1_max = np.array([1, 1])[:, np.newaxis, np.newaxis]
S1_mean = np.array([-9.6334679, -16.76533283])[:, np.newaxis, np.newaxis]
S1_std = np.array([4.03766336, 4.28410922])[:, np.newaxis, np.newaxis]

S2_min = np.array([87, 210, 129, 250, 99, 54, 61, 46])[:, np.newaxis, np.newaxis]
S2_max = np.array([6884.04, 6996.04, 6944, 7146, 8000.08, 8464, 7320, 7938])[:, np.newaxis, np.newaxis]
S2_mean = np.array([921.53919652, 1138.64855809, 1094.08537426, 1483.95639562, 2556.91823607, 2629.38505853, 1974.45128672, 1468.42488102])[:, np.newaxis, np.newaxis]
S2_std = np.array([536.32987193, 540.95303603, 590.69740503, 583.05947553, 1058.82295588, 1071.90393996, 765.12367657, 706.8396577])[:, np.newaxis, np.newaxis]

In [4]:
def tif2npy(tif_path, output_path, image_class):
    ds = gdal.Open(tif_path)
    if ds is None:
        print("No such file !")

    image = ds.ReadAsArray(0, 0, ds.RasterXSize, ds.RasterYSize)

    if image_class == "MODIS":
        image = image[[0, 1, 2, 3, 5, 6], :, :]
        image = (image - MODIS_min) / (MODIS_max - MODIS_min)
    elif image_class == "S1":
        image = image[[0, 1], :, :]
        # 有效范围截断
        np.clip(image, -50, 1, out=image)
        image = (image - S1_min) / (S1_max - S1_min)
    elif image_class == "S2" or "ref":
        image = image[[1, 2, 3, 4, 7, 8, 10, 11], :, :]
        # 使用截断的百分位数定义数据的范围
        percentiles_1 = np.percentile(image, 1, axis=(1, 2))
        percentiles_99 = np.percentile(image, 99, axis=(1, 2))
        image = np.clip(image, percentiles_1[:, np.newaxis, np.newaxis], percentiles_99[:, np.newaxis, np.newaxis])
        image = (image - S2_min) / (S2_max - S2_min)
    
    # if image_class == "S2":
    #     selected_bands = [1, 2, 3, 4, 7, 8, 10, 11]
    # elif image_class == "S1":
    #     selected_bands = [0, 1]
    # elif image_class == "MODIS":
    #     selected_bands = [0, 1, 2, 3, 5, 6]

    # S2: (8, 250, 250), S1: (2, 250, 250), MODIS: (6, 5, 5)
    np.save(output_path, image.astype(np.float32))

    ds = None

    print("Successfully convert", tif_path, "to", output_path)
    
    return True

In [5]:
import os

def get_files_by_type(folder_path, file_type):
    file_paths = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith(file_type):
                file_paths.append(os.path.join(root, file))
    return file_paths

def categorize_files_by_keyword(file_paths, keywords):
    categorized_files = {keyword: [] for keyword in keywords}
    
    for file_path in file_paths:
        for keyword in keywords:
            if keyword in file_path:
                categorized_files[keyword].append(file_path)
    
    return categorized_files

folder_path = r"D:\ENVI\data\NingBo\NB_roi\cropped_data"

file_type = ".tif"

keywords = ["MODIS", "S1", "S2", "ref"]

file_paths = get_files_by_type(folder_path, file_type)

categorized_files = categorize_files_by_keyword(file_paths, keywords)

# for keyword, files in categorized_files.items():
#     print(f"Files with '{keyword}' in the path:")
#     for file in files:
#         print(file)
#     print("\n")


In [6]:
for keyword, files in categorized_files.items():
    for file_path in files:
        output_path = file_path.replace(r"D:\ENVI\data\NingBo\NB_roi\cropped_data", r"D:\Code\MODIS_S1_S2\dataset\SatelliteImages").replace("tif", "npy")
        tif2npy(file_path, output_path, image_class=keyword)

Successfully convert D:\ENVI\data\NingBo\NB_roi\cropped_data\test\MODIS\MODIS_2194.tif to D:\Code\MODIS_S1_S2\dataset\SatelliteImages\test\MODIS\MODIS_2194.npy
Successfully convert D:\ENVI\data\NingBo\NB_roi\cropped_data\test\MODIS\MODIS_2195.tif to D:\Code\MODIS_S1_S2\dataset\SatelliteImages\test\MODIS\MODIS_2195.npy
Successfully convert D:\ENVI\data\NingBo\NB_roi\cropped_data\test\MODIS\MODIS_2196.tif to D:\Code\MODIS_S1_S2\dataset\SatelliteImages\test\MODIS\MODIS_2196.npy
Successfully convert D:\ENVI\data\NingBo\NB_roi\cropped_data\test\MODIS\MODIS_2197.tif to D:\Code\MODIS_S1_S2\dataset\SatelliteImages\test\MODIS\MODIS_2197.npy
Successfully convert D:\ENVI\data\NingBo\NB_roi\cropped_data\test\MODIS\MODIS_2198.tif to D:\Code\MODIS_S1_S2\dataset\SatelliteImages\test\MODIS\MODIS_2198.npy
Successfully convert D:\ENVI\data\NingBo\NB_roi\cropped_data\test\MODIS\MODIS_2199.tif to D:\Code\MODIS_S1_S2\dataset\SatelliteImages\test\MODIS\MODIS_2199.npy
Successfully convert D:\ENVI\data\NingBo

In [40]:
import glob
import numpy as np


def calc_statistics(image_paths):
    images = []
    for image_path in image_paths:
        images.append(np.load(image_path))

    min_val = np.min(images, axis=(0, 2, 3))
    max_val = np.max(images, axis=(0, 2, 3))
    mean = np.mean(images, axis=(0, 2, 3))
    std = np.std(images, axis=(0, 2, 3))

    return min_val, max_val, mean, std


MODIS_dir = r"D:\Code\MODIS_S1_S2\dataset\SatelliteImages\train\MODIS\MODIS_*.npy"
S1_dir = r"D:\Code\MODIS_S1_S2\dataset\SatelliteImages\train\S1\S1_*.npy"
S2_dir = r"D:\Code\MODIS_S1_S2\dataset\SatelliteImages\train\S2\S2_*.npy"

MODIS_image_paths = glob.glob(MODIS_dir)
S1_image_paths = glob.glob(S1_dir)
S2_image_paths = glob.glob(S2_dir)


# min_val, max_val, mean, std = calc_statistics(MODIS_image_paths)
# print("MODIS: ")
# print(f"min: {min_val}\nmax: {max_val}\nmean: {mean}\nstd: {std}")

# min_val, max_val, mean, std = calc_statistics(S1_image_paths)
# print("S1: ")
# print(f"min: {min_val}\nmax: {max_val}\nmean: {mean}\nstd: {std}")

min_val, max_val, mean, std = calc_statistics(S2_image_paths[:1000])
print("S2: ")
print(f"min: {min_val}\nmax: {max_val}\nmean: {mean}\nstd: {std}")

S2: 
min: [0.         0.         0.         0.         0.0025313  0.00309156
 0.00358176 0.00380005]
max: [0.7946106  0.7571426  0.75290389 0.78175754 0.99290224 0.99239001
 0.77448684 0.71971617]
mean: [0.11742244 0.12955752 0.1355439  0.17225664 0.30778315 0.30436722
 0.26466027 0.18253037]
std: [0.07662227 0.07707557 0.08279052 0.08077306 0.12907759 0.12280831
 0.09672365 0.08444126]


In [38]:
image = np.load(r"D:\Code\MODIS_S1_S2\dataset\SatelliteImages\train\S2\S2_288.npy")

min_val = np.min(image, axis=(1, 2))
max_val = np.max(image, axis=(1, 2))
mean = np.mean(image, axis=(1, 2))
std = np.std(image, axis=(1, 2))


print(f"min: {min_val}\nmax: {max_val}\nmean: {mean}\nstd: {std}")

min: [0.20611913 0.19392753 0.18708731 0.18314965 0.17149554 0.16539834
 0.16696515 0.14964521]
max: [0.38649177 0.43294764 0.49757887 0.52204176 0.81115493 0.7373365
 0.61688938 0.55005068]
mean: [0.27221155 0.29990546 0.30807936 0.36063666 0.49824132 0.46768299
 0.42630211 0.33302743]
std: [0.0400287  0.04589418 0.06659799 0.05520274 0.11896137 0.10590931
 0.08541645 0.07693003]
