In [2]:
###################################################################################
#                                                                                 #
#                BUNDLING CSV XRAIN DATA INTO A SINGLE SHAPEFILE (GIS)            #
#                                                                                 #
#                             Christopher Gomez, 2024                             #
#                                                                                 #
###################################################################################


import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import glob
import os
from datetime import datetime

def create_grid_shapefile_with_data(northwest_corner, southeast_corner, n_rows, n_cols, data_folder, output_file):
    """
    Create a georeferenced grid shapefile with time-series data from CSV files.
    
    Parameters:
    -----------
    northwest_corner: tuple
        (latitude, longitude) of northwest corner
    southeast_corner: tuple
        (latitude, longitude) of southeast corner
    n_rows: int
        Number of rows in the grid
    n_cols: int
        Number of columns in the grid
    data_folder: str
        Path to folder containing CSV files
    output_file: str
        Path for output shapefile
    """
    print("Creating grid geometry...")
    
    # Calculate cell dimensions
    lat_step = (northwest_corner[0] - southeast_corner[0]) / n_rows
    lon_step = (southeast_corner[1] - northwest_corner[1]) / n_cols
    
    # Create empty lists to store geometries and attributes
    geometries = []
    cells = []
    
    # Generate grid cells and store their indices
    for row in range(n_rows):
        for col in range(n_cols):
            # Calculate corner coordinates for each cell
            north = northwest_corner[0] - (row * lat_step)
            south = north - lat_step
            west = northwest_corner[1] + (col * lon_step)
            east = west + lon_step
            
            # Create polygon for the cell
            cell = Polygon([
                (west, north),
                (east, north),
                (east, south),
                (west, south),
                (west, north)
            ])
            
            geometries.append(cell)
            cells.append({'geometry': cell, 'row': row, 'col': col})
    
    # Create initial GeoDataFrame with the grid
    grid_gdf = gpd.GeoDataFrame(cells)
    
    # Set CRS to WGS 84
    grid_gdf.set_crs(epsg=4326, inplace=True)
    
    print("Processing CSV files...")
    
    # Get list of all CSV files in the folder
    csv_files = sorted(glob.glob(os.path.join(data_folder, '*.csv')))
    
    # Process each CSV file
    for csv_file in csv_files:
        try:
            # Extract timestamp from filename (assuming format YYYYMMDD-HHMM)
            timestamp = datetime.strptime(os.path.basename(csv_file)[:13], '%Y%m%d-%H%M')
            print(f"Processing file for timestamp: {timestamp}")
            
            # Read CSV file
            df = pd.read_csv(csv_file, header=None)
            
            if df.shape != (n_rows, n_cols):
                print(f"Warning: CSV file {csv_file} has unexpected dimensions: {df.shape}")
                continue
            
            # Create a list to store values in the same order as our grid cells
            values = []
            for row in range(n_rows):
                for col in range(n_cols):
                    values.append(df.iloc[row, col])
            
            # Add the values as a new column in the GeoDataFrame
            column_name = f'T{timestamp.strftime("%Y%m%d_%H%M")}'
            grid_gdf[column_name] = values
            
        except Exception as e:
            print(f"Error processing file {csv_file}: {str(e)}")
    
    print("Saving shapefile...")
    
    # Save to shapefile
    grid_gdf.to_file(output_file)
    
    print(f"Complete! Shapefile saved to {output_file}")
    print(f"Number of time steps processed: {len(csv_files)}")
    
    return grid_gdf

# Example usage
if __name__ == "__main__":
    # Define parameters based on your data
    nw_corner = (37.7584, 136.0218)  # (lat, lon)
    se_corner = (36.2813, 137.8781)  # (lat, lon)
    n_rows = 709
    n_cols = 594
    
    # Create the grid shapefile with data
    grid_gdf = create_grid_shapefile_with_data(
        northwest_corner=nw_corner,
        southeast_corner=se_corner,
        n_rows=n_rows,
        n_cols=n_cols,
        data_folder=os.getcwd(),  # Current working directory
        output_file='temporal_grid_with_data.shp'
    )

Creating grid geometry...
Processing CSV files...
Processing file for timestamp: 2024-09-20 12:10:00
Processing file for timestamp: 2024-09-20 12:20:00
Processing file for timestamp: 2024-09-20 12:30:00
Processing file for timestamp: 2024-09-20 12:40:00
Processing file for timestamp: 2024-09-20 12:50:00
Processing file for timestamp: 2024-09-20 13:00:00
Processing file for timestamp: 2024-09-20 13:10:00
Processing file for timestamp: 2024-09-20 13:20:00
Processing file for timestamp: 2024-09-20 13:30:00
Processing file for timestamp: 2024-09-20 13:40:00
Processing file for timestamp: 2024-09-20 13:50:00
Processing file for timestamp: 2024-09-20 14:00:00
Processing file for timestamp: 2024-09-20 14:10:00
Processing file for timestamp: 2024-09-20 14:20:00
Processing file for timestamp: 2024-09-20 14:30:00
Processing file for timestamp: 2024-09-20 14:40:00
Processing file for timestamp: 2024-09-20 14:50:00
Processing file for timestamp: 2024-09-20 15:00:00
Processing file for timestamp: 2

  grid_gdf.to_file(output_file)


Complete! Shapefile saved to temporal_grid_with_data.shp
Number of time steps processed: 71


  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
