to check missing values in data

In [6]:
import tifffile as tiff
import numpy as np
import os
import glob

folder_path = 'fire_25294714'
file_list = sorted(glob.glob(os.path.join(folder_path, '*.tif')))

# Function to check missing data in one image
def check_missing_data(data):
    has_nan = np.isnan(data).any()
    has_inf = np.isinf(data).any()
    has_all_zeros = np.all(data == 0)
    return has_nan, has_inf, has_all_zeros


for image_path in file_list:
    data = tiff.imread(image_path)

    file_has_issue = False

    for channel in range(data.shape[2]):
        channel_data = data[:, :, channel]
        has_nan, has_inf, has_all_zeros = check_missing_data(channel_data)

        if has_nan or has_inf or has_all_zeros:
            print(f" Issue found in file: {os.path.basename(image_path)}, Channel {channel + 1}")
            if has_nan:
                print("   - Contains NaN values")
            if has_inf:
                print("   - Contains Inf values")
            if has_all_zeros:
                print("   - Entire channel is all zeros")
            file_has_issue = True

    if not file_has_issue:
        print(f"✅ File OK: {os.path.basename(image_path)}")



 Issue found in file: 2021-07-08.tif, Channel 1
   - Contains NaN values
 Issue found in file: 2021-07-08.tif, Channel 2
   - Contains NaN values
 Issue found in file: 2021-07-08.tif, Channel 3
   - Contains NaN values
 Issue found in file: 2021-07-08.tif, Channel 6
   - Entire channel is all zeros
 Issue found in file: 2021-07-08.tif, Channel 18
   - Entire channel is all zeros
 Issue found in file: 2021-07-08.tif, Channel 23
   - Contains NaN values
 Issue found in file: 2021-07-09.tif, Channel 6
   - Entire channel is all zeros
 Issue found in file: 2021-07-09.tif, Channel 18
   - Entire channel is all zeros
 Issue found in file: 2021-07-09.tif, Channel 23
   - Contains NaN values
 Issue found in file: 2021-07-10.tif, Channel 6
   - Entire channel is all zeros
 Issue found in file: 2021-07-10.tif, Channel 18
   - Entire channel is all zeros
 Issue found in file: 2021-07-10.tif, Channel 23
   - Contains NaN values
 Issue found in file: 2021-07-11.tif, Channel 6
   - Entire channel is

In [3]:
import tifffile as tiff
import numpy as np
import os
import glob

# Main year folder
year_folder = '2021'

# Find all fire folders inside the year
fire_folders = sorted([f.path for f in os.scandir(year_folder) if f.is_dir()])

print(f"Total fire folders found: {len(fire_folders)}")

# Function to check missing data
def check_missing_data(data):
    has_nan = np.isnan(data).any()
    has_inf = np.isinf(data).any()
    return has_nan or has_inf

# Channels to skip (found previously)
skip_channels = {0, 1, 2, 5, 17, 22}

# good folder counter
good_folder_count = 0

# Loop over fire folders
for fire_folder in fire_folders:
    tif_files = sorted(glob.glob(os.path.join(fire_folder, '*.tif')))

    all_files_clean = True

    for tif_file in tif_files:
        data = tiff.imread(tif_file)
        for channel in range(data.shape[2]):
            if channel in skip_channels:
                continue
            if check_missing_data(data[:, :, channel]):
                all_files_clean = False
                break
        if not all_files_clean:
            break

    if all_files_clean:
        good_folder_count += 1

# After checking all folders
print(f"\nTotal number of good folders: {good_folder_count} ✅")


Total fire folders found: 156

Total number of good folders: 49 ✅


In [4]:

total_pixels = 0
noisy_pixels = 0

for fire_folder in fire_folders:
    tif_files = sorted(glob.glob(os.path.join(fire_folder, '*.tif')))

    for tif_file in tif_files:
        data = tiff.imread(tif_file)

        for channel in range(data.shape[2]):
            if channel in skip_channels:
                continue  # Skip these channels

            channel_data = data[:, :, channel]
            total_pixels += channel_data.size  # Total number of pixels

            # Count noisy pixels
            noisy_mask = np.isnan(channel_data) | np.isinf(channel_data)
            noisy_pixels += np.sum(noisy_mask)

# calculating the noise percentage
if total_pixels > 0:
    noise_percentage = (noisy_pixels / total_pixels) * 100
    print(f"\nTotal pixels checked: {total_pixels}")
    print(f"Total noisy pixels (NaN or Inf): {noisy_pixels}")
    print(f"Noise Percentage: {noise_percentage:.4f}%")
else:
    print("No data found to process.")


Total pixels checked: 5371817913
Total noisy pixels (NaN or Inf): 130098764
Noise Percentage: 2.4219%
