In [None]:
pip install rasterio geopandas pandas shapely

In [None]:
# we did not need all the region (we need only the country)

# #1. Use Rasterio’s Windowed Reading

# import rasterio
# import pandas as pd
# from rasterio.windows import Window
# import numpy as np

# # Filepath for your large TIFF file
# raster_file = r'C:\Users\NU\Downloads\SVDNB_npp_20230101-20230131_00N060E_vcmcfg_v10_c202302080600\SVDNB_npp_20230101-20230131_00N060E_vcmcfg_v10_c202302080600.avg_rade9h.tif'
# with rasterio.open(raster_file) as src:
#     print(src.meta)  # Metadata about the raster file
#     print(src.descriptions)  # Description of the raster bands
# # Output CSV path
# output_csv = r'C:\Users\NU\Downloads\raster_output.csv'

# # Step 1: Open the raster file
# with rasterio.open(raster_file) as src:
#     # Get raster metadata
#     transform = src.transform
#     crs = src.crs
#     band = src.read(1)  # Read first band metadata only

#     # Prepare to store data incrementally
#     grid_data = []

#     # Step 2: Process the raster in windows (chunks)
#     window_size = 512  # Size of each chunk (adjust as needed)
#     rows, cols = src.height, src.width

#     for row_start in range(0, rows, window_size):
#         for col_start in range(0, cols, window_size):
#             # Define the window (chunk) to read
#             window = Window(col_start, row_start, window_size, window_size)
#             data = src.read(1, window=window)  # Read the chunk
            
#             # Transform window coordinates to actual positions
#             for i in range(data.shape[0]):
#                 for j in range(data.shape[1]):
#                     value = data[i, j]
#                     if np.isnan(value):  # Skip nodata values
#                         continue
#                     # Get global coordinates of the cell
#                     row_idx, col_idx = row_start + i, col_start + j
#                     left, top = transform * (col_idx, row_idx)
#                     right, bottom = transform * (col_idx + 1, row_idx + 1)

#                     # Append data
#                     grid_data.append({
#                         'id': row_idx * cols + col_idx + 1,  # Unique ID
#                         'left': left,
#                         'top': top,
#                         'right': right,
#                         'bottom': bottom,
#                         'row_index': row_idx,
#                         'col_index': col_idx,
#                         'raster_value': value
#                     })

#             # Optional: Save intermediate results if needed to avoid memory issues
#             if len(grid_data) >= 100000:
#                 temp_df = pd.DataFrame(grid_data)
#                 temp_df.to_csv(output_csv, mode='a', index=False, header=not bool(temp_df.empty))
#                 grid_data = []  # Clear the grid data to save memory

#     # Step 3: Save the remaining data (if any) to CSV
#     if grid_data:
#         df = pd.DataFrame(grid_data)
#         df.to_csv(output_csv, mode='a', index=False, header=not bool(pd.read_csv(output_csv).empty))

# print(f"CSV file saved: {output_csv}")


Metadata: The file name SVDNB_npp_20230101-20230131_00N060E_vcmcfg_v10_c202302080600.avg_rade9h provides important metadata that can help understand the contents of the file. Let's break it down:

File Name Breakdown:
SVDNB:

This likely refers to "Suomi National Polar-orbiting Partnership (Suomi NPP)". Suomi NPP is a satellite operated by NASA and NOAA that collects environmental data. Specifically, SVDNB refers to the Day/Night Band (DNB) on the VIIRS sensor onboard the Suomi NPP satellite, which is used to observe nighttime lights.
npp:

Refers to the Suomi National Polar-orbiting Partnership (NPP) mission, which is a collaboration between NASA, NOAA, and other agencies for environmental monitoring.
20230101-20230131:

The date range for the data. The data collected here corresponds to the period from January 1, 2023, to January 31, 2023.
00N060E:

This is likely the location or region of interest. The coordinates 00N060E represent a location at 0° latitude (equator) and 60° longitude (East), somewhere in the Indian Ocean.
This information indicates that the data pertains to a specific geographic area or grid covering that region.
vcmcfg_v10:

VCM Configuration: This likely refers to the VIIRS Cloud Mask (VCM) Configuration version 10, which is used for identifying clouds in satellite imagery.
This could be part of the processing steps to exclude cloud-covered areas from the nighttime lights data.
c202302080600:

This appears to be a timestamp in the format cYYYYMMDDHHMM, where:
c is a prefix for the date and time.
2023-02-08 06:00 refers to February 8, 2023, at 06:00 UTC. This timestamp likely indicates when the data was processed or when the file was generated.
avg_rade9h:

This likely indicates the averaged radiance values over a 9-hour period (the rade9h suffix suggests radiance averaged over a 9-hour window).
This could represent a temporal aggregation of nighttime light data to smooth out fluctuations in light intensity.
Summary of Information Derived from the File Name:
The file contains nighttime light data from the Suomi NPP satellite (using the VIIRS sensor), collected between January 1, 2023, and January 31, 2023, for a specific geographic region near the equator (0°N, 60°E). The data is processed with cloud masking (vcmcfg_v10) and represents average radiance values (avg_rade9h) over a 9-hour period. The file was generated on February 8, 2023.



In [None]:
import rasterio
import geopandas as gpd
from rasterio.mask import mask
from shapely.geometry import box
import numpy as np
import pandas as pd

# Filepaths
raster_file = r"D:\NU\semester_5\data_analysis\project\edited_project\SVDNB_npp_20200101-20200131_00N060W_vcmcfg_v10_c202002111500\SVDNB_npp_20200101-20200131_00N060W_vcmcfg_v10_c202002111500.avg_rade9h.tif"
boundary_file = r"D:\NU\semester_5\data_analysis\project\edited_project\geoBoundaries-BRA-ADM1-all\geoBoundaries-BRA-ADM1.geojson"
output_csv = r"D:\NU\semester_5\data_analysis\project\edited_project\viirs_dataset.csv"

# Load brazil's Boundary
brazil = gpd.read_file(boundary_file)
brazil = brazil.to_crs(epsg=4326)

# Open the Raster and Check Overlap
with rasterio.open(raster_file) as src:
    raster_bounds = box(*src.bounds)
    print("Raster Bounds:", src.bounds)
    print("brazil Bounds:", brazil.total_bounds)

    if not raster_bounds.intersects(brazil.unary_union):
        raise ValueError("brazil's boundary does not overlap with the raster extent.")

    # Clip the raster
    brazil_geom_list = [feature["geometry"] for feature in brazil.__geo_interface__["features"]]
    clipped_raster, clipped_transform = mask(src, brazil_geom_list, crop=True)

# Extract Raster Values
light_intensity = clipped_raster[0]
rows, cols = np.where(~np.isnan(light_intensity))
values = light_intensity[rows, cols]
x_coords, y_coords = rasterio.transform.xy(clipped_transform, rows, cols)

data = pd.DataFrame({
    'longitude': x_coords,
    'latitude': y_coords,
    'light_intensity': values
})
data.to_csv(output_csv, index=False)
print(f"Extracted data saved to {output_csv}")


In [None]:
# we did not need that

# import rasterio
# import pandas as pd
# from rasterio.windows import Window
# from shapely.geometry import box
# import numpy as np

# # Filepath for your large TIFF file
# raster_file = r'C:\Users\NU\Downloads\SVDNB_npp_20230101-20230131_00N060E_vcmcfg_v10_c202302080600\SVDNB_npp_20230101-20230131_00N060E_vcmcfg_v10_c202302080600.avg_rade9h.tif'
# output_csv = r'C:\Users\NU\Downloads\indonesia_light_intensity_detailed.csv'

# # Define the geographic bounds of Indonesia
# indonesia_bounds = {
#     "left": 95.0,  # Min longitude
#     "right": 141.0,  # Max longitude
#     "bottom": -11.0,  # Min latitude
#     "top": 6.0  # Max latitude
# }

# # Open the raster file
# with rasterio.open(raster_file) as src:
#     print("Raster Metadata:", src.meta)  # Metadata about the raster file
#     transform = src.transform
#     crs = src.crs

#     # Get the raster's bounding box
#     raster_bounds = box(*src.bounds)
#     print("Raster Bounds:", raster_bounds.bounds)

#     # Check if the raster covers Indonesia
#     indonesia_box = box(indonesia_bounds["left"], indonesia_bounds["bottom"],
#                         indonesia_bounds["right"], indonesia_bounds["top"])
#     if not raster_bounds.intersects(indonesia_box):
#         raise ValueError("The raster does not cover the Indonesia region!")

#     # Get pixel indices for Indonesia's bounds
#     row_start, col_start = src.index(indonesia_bounds["left"], indonesia_bounds["top"])
#     row_end, col_end = src.index(indonesia_bounds["right"], indonesia_bounds["bottom"])

#     # Clip indices to raster dimensions
#     row_start, col_start = max(row_start, 0), max(col_start, 0)
#     row_end, col_end = min(row_end, src.height), min(col_end, src.width)

#     # Prepare to store data incrementally
#     grid_data = []

#     # Process the raster in windows (chunks) for the Indonesia region
#     window_size = 512  # Size of each chunk (adjust as needed)
#     for row_window_start in range(row_start, row_end, window_size):
#         for col_window_start in range(col_start, col_end, window_size):
#             # Define the window (chunk) to read
#             row_window_end = min(row_window_start + window_size, row_end)
#             col_window_end = min(col_window_start + window_size, col_end)
#             window = Window(col_window_start, row_window_start,
#                             col_window_end - col_window_start,
#                             row_window_end - row_window_start)

#             # Read the chunk
#             data = src.read(1, window=window)

#             # Transform window coordinates to actual positions
#             for i in range(data.shape[0]):
#                 for j in range(data.shape[1]):
#                     value = data[i, j]
#                     if np.isnan(value):  # Skip nodata values
#                         continue
#                     # Get global coordinates of the cell
#                     global_row_idx = row_window_start + i
#                     global_col_idx = col_window_start + j
#                     left, top = transform * (global_col_idx, global_row_idx)
#                     right, bottom = transform * (global_col_idx + 1, global_row_idx + 1)

#                     # Append data
#                     grid_data.append({
#                         'id': global_row_idx * src.width + global_col_idx + 1,  # Unique ID
#                         'left': left,
#                         'top': top,
#                         'right': right,
#                         'bottom': bottom,
#                         'row_index': global_row_idx,
#                         'col_index': global_col_idx,
#                         'raster_value': value
#                     })

#             # Optional: Save intermediate results to avoid memory issues
#             if len(grid_data) >= 100000:
#                 temp_df = pd.DataFrame(grid_data)
#                 temp_df.to_csv(output_csv, mode='a', index=False, header=not bool(pd.read_csv(output_csv).empty))
#                 grid_data = []  # Clear grid data to save memory

#     # Save remaining data (if any)
#     if grid_data:
#         df = pd.DataFrame(grid_data)
#         df.to_csv(output_csv, mode='a', index=False, header=not bool(pd.read_csv(output_csv).empty))

# print(f"CSV file saved: {output_csv}")
