Testing on 2022 data on google drive

In [47]:
import os
import h5py
import numpy as np
import pandas as pd

# Path to  Sentinel NDVI HDF5 folder
input_folder = "/content/drive/MyDrive/MlOps_Project/NDVI/2022/OH"

# Placeholder for NDVI results
ndvi_records = []

# Loop through all H5 files
for filename in os.listdir(input_folder):
    if filename.endswith(".h5"):
        file_path = os.path.join(input_folder, filename)
        print(f"Processing {filename}")

        try:
            with h5py.File(file_path, "r") as f:
                for fips in f.keys():
                    for date in f[fips].keys():
                        group = f[fips][date]

                        # Check for required bands
                        if "data" not in group:
                            print(f"⚠️ No 'data' in {filename} -> {fips} / {date}")
                            continue

                        try:
                            data = group["data"][:]  # shape: (time, height, width, bands)

                            for i, tile in enumerate(data):
                                # Sentinel band assumption: last axis [Red, NIR]
                                red = tile[:, :, 0].astype(np.float32)
                                nir = tile[:, :, 1].astype(np.float32)

                                ndvi = (nir - red) / (nir + red + 1e-5)
                                ndvi = np.clip(ndvi, -1, 1)

                                mean_ndvi = np.nanmean(ndvi)

                                ndvi_records.append({
                                    "filename": filename,
                                    "fips": fips,
                                    "date": date,
                                    "tile_index": i,
                                    "mean_ndvi": mean_ndvi
                                })

                        except Exception as e:
                            print(f"❌ Error processing {filename} -> {fips}/{date}: {e}")
        except Exception as e:
            print(f"❌ Cannot open file {filename}: {e}")

# Save to CSV
output_df = pd.DataFrame(ndvi_records)
csv_path = os.path.join("/content/drive/MyDrive/MlOps_Project/", "ndvi_summary.csv")
output_df.to_csv(csv_path, index=False)
print(f"✅ NDVI summary saved to: {csv_path}")


Processing Vegetation_39_OH_2022-01-01_2022-03-31.h5
Processing Vegetation_39_OH_2022-04-01_2022-06-30.h5
Processing Vegetation_39_OH_2022-07-01_2022-09-30.h5
Processing Vegetation_39_OH_2022-10-01_2022-12-31.h5
✅ NDVI summary saved to: /content/drive/MyDrive/MlOps_Project/ndvi_summary.csv
