# Space Time Cube Generator

Creates **4 NetCDF cubes** with proper CF-compliant time encoding:
- helene_iterative.nc (per-bin counts)
- helene_cumulative.nc (growing totals)
- francine_iterative.nc
- francine_cumulative.nc

In [None]:
# Config
events = ['helene', 'francine']
cell_km = 10
time_hours = 4

print(f'Building {len(events)} events x 2 variants = 4 cubes')
print(f'Resolution: {cell_km}km, {time_hours}h bins')

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
from netCDF4 import Dataset, date2num
from pathlib import Path
import warnings, json
warnings.filterwarnings('ignore')
print('Ready')

In [None]:
BASE = Path(r'C:\\Users\\colto\\Documents\\GitHub\\Tweet_project')
DATA = BASE / 'data'
OUT = BASE / 'spacetimecube_output'
OUT.mkdir(exist_ok=True)
print(f'Output: {OUT}')

In [None]:
def create_cube(event_name, cell_km, time_hours, output_dir):
    print(f'\\n{"="*70}')
    print(f'PROCESSING: {event_name.upper()}')
    print("="*70)
    
    # Load tweets
    tweets = gpd.read_file(DATA / 'geojson' / f'{event_name}.geojson')
    tweets['time'] = pd.to_datetime(tweets['time'])
    if tweets.crs is None:
        tweets.set_crs('EPSG:4326', inplace=True)
    tweets = tweets.to_crs('EPSG:5070')
    tweets['x'] = tweets.geometry.x
    tweets['y'] = tweets.geometry.y
    print(f'Loaded {len(tweets)} tweets')
    
    # Create grid
    cell_m = cell_km * 1000
    xmin, ymin, xmax, ymax = tweets.total_bounds
    xmin = np.floor(xmin / cell_m) * cell_m
    ymin = np.floor(ymin / cell_m) * cell_m
    xmax = np.ceil(xmax / cell_m) * cell_m
    ymax = np.ceil(ymax / cell_m) * cell_m
    x_edges = np.arange(xmin, xmax + cell_m, cell_m)
    y_edges = np.arange(ymin, ymax + cell_m, cell_m)
    x_centers = (x_edges[:-1] + x_edges[1:]) / 2
    y_centers = (y_edges[:-1] + y_edges[1:]) / 2
    nx = len(x_edges) - 1
    ny = len(y_edges) - 1
    print(f'Grid: {nx} x {ny} cells')
    
    # Create time bins
    tmin = tweets.time.min().floor(f'{time_hours}h')
    tmax = tweets.time.max().ceil(f'{time_hours}h')
    time_edges = pd.date_range(tmin, tmax, freq=f'{time_hours}h')
    time_centers = time_edges[:-1] + (time_edges[1:] - time_edges[:-1]) / 2
    nt = len(time_edges) - 1
    print(f'Time: {nt} bins from {tmin} to {tmax}')
    
    # Bin tweets
    tweets['x_bin'] = pd.cut(tweets.x, bins=x_edges, labels=False, include_lowest=True)
    tweets['y_bin'] = pd.cut(tweets.y, bins=y_edges, labels=False, include_lowest=True)
    tweets['t_bin'] = pd.cut(tweets.time, bins=time_edges, labels=False, include_lowest=True)
    tweets_binned = tweets.dropna(subset=['x_bin', 'y_bin', 't_bin']).copy()
    tweets_binned[['x_bin', 'y_bin', 't_bin']] = tweets_binned[['x_bin', 'y_bin', 't_bin']].astype(int)
    print(f'{len(tweets_binned)} tweets binned successfully')
    
    # Build iterative cube (per-bin counts)
    cube_iter = np.zeros((nt, ny, nx), dtype=np.int32)
    for _, row in tweets_binned.iterrows():
        cube_iter[row.t_bin, row.y_bin, row.x_bin] += 1
    print(f'Iterative cube: {cube_iter.sum()} total, {(cube_iter>0).sum()} non-zero cells')
    
    # Build cumulative cube (growing totals)
    cube_cum = np.zeros((nt, ny, nx), dtype=np.int32)
    for t in range(nt):
        cube_cum[t] = cube_iter[:t+1].sum(axis=0)
    print(f'Cumulative cube: {cube_cum[-1].sum()} final total')
    
    # Convert time to numeric (CF-compliant)
    time_units = 'hours since 1970-01-01 00:00:00'
    time_numeric = date2num(time_centers.to_pydatetime(), units=time_units, calendar='gregorian')
    
    # Save iterative
    nc_iter = output_dir / f'{event_name}_iterative.nc'
    with Dataset(nc_iter, 'w', format='NETCDF4') as nc:
        nc.createDimension('x', nx)
        nc.createDimension('y', ny)
        nc.createDimension('time', nt)
        
        x_var = nc.createVariable('x', 'f8', ('x',))
        x_var[:] = x_centers
        x_var.units = 'meters'
        x_var.standard_name = 'projection_x_coordinate'
        x_var.long_name = 'x coordinate (Albers Equal Area)'
        
        y_var = nc.createVariable('y', 'f8', ('y',))
        y_var[:] = y_centers
        y_var.units = 'meters'
        y_var.standard_name = 'projection_y_coordinate'
        y_var.long_name = 'y coordinate (Albers Equal Area)'
        
        t_var = nc.createVariable('time', 'f8', ('time',))
        t_var[:] = time_numeric
        t_var.units = time_units
        t_var.calendar = 'gregorian'
        t_var.standard_name = 'time'
        t_var.long_name = 'time'
        
        count_var = nc.createVariable('COUNT', 'i4', ('time', 'y', 'x'), zlib=True, complevel=4)
        count_var[:] = cube_iter
        count_var.long_name = 'Tweet count per time bin (iterative)'
        count_var.units = 'count'
        
        nc.Conventions = 'CF-1.6'
        nc.title = f'{event_name.title()} Hurricane Tweets - Iterative'
        nc.institution = 'Tweet Project'
        nc.source = 'Hurricane tweet data'
        nc.crs = 'EPSG:5070'
    print(f'Saved iterative: {nc_iter.name} ({nc_iter.stat().st_size/1024:.1f} KB)')
    
    # Save cumulative
    nc_cum = output_dir / f'{event_name}_cumulative.nc'
    with Dataset(nc_cum, 'w', format='NETCDF4') as nc:
        nc.createDimension('x', nx)
        nc.createDimension('y', ny)
        nc.createDimension('time', nt)
        
        x_var = nc.createVariable('x', 'f8', ('x',))
        x_var[:] = x_centers
        x_var.units = 'meters'
        x_var.standard_name = 'projection_x_coordinate'
        x_var.long_name = 'x coordinate (Albers Equal Area)'
        
        y_var = nc.createVariable('y', 'f8', ('y',))
        y_var[:] = y_centers
        y_var.units = 'meters'
        y_var.standard_name = 'projection_y_coordinate'
        y_var.long_name = 'y coordinate (Albers Equal Area)'
        
        t_var = nc.createVariable('time', 'f8', ('time',))
        t_var[:] = time_numeric
        t_var.units = time_units
        t_var.calendar = 'gregorian'
        t_var.standard_name = 'time'
        t_var.long_name = 'time'
        
        count_var = nc.createVariable('COUNT', 'i4', ('time', 'y', 'x'), zlib=True, complevel=4)
        count_var[:] = cube_cum
        count_var.long_name = 'Tweet count cumulative from start'
        count_var.units = 'count'
        
        nc.Conventions = 'CF-1.6'
        nc.title = f'{event_name.title()} Hurricane Tweets - Cumulative'
        nc.institution = 'Tweet Project'
        nc.source = 'Hurricane tweet data'
        nc.crs = 'EPSG:5070'
    print(f'Saved cumulative: {nc_cum.name} ({nc_cum.stat().st_size/1024:.1f} KB)')
    
    return {
        'event': event_name,
        'dimensions': {'x': nx, 'y': ny, 'time': nt},
        'tweets': int(cube_iter.sum()),
        'files': [str(nc_iter), str(nc_cum)]
    }

print('Function defined')

In [None]:
# Generate all cubes
results = []
for event in events:
    result = create_cube(event, cell_km, time_hours, OUT)
    results.append(result)

print(f'\\n{"="*70}')
print('ALL CUBES CREATED')
print("="*70)

In [None]:
# Summary
summary = {
    'created': pd.Timestamp.now().isoformat(),
    'resolution': {'cell_km': cell_km, 'time_hours': time_hours},
    'cubes': results,
    'total_files': len(results) * 2
}
print(json.dumps(summary, indent=2))

with open(OUT / 'summary.json', 'w') as f:
    json.dump(summary, f, indent=2)
print(f'\\nSummary saved to: {OUT / "summary.json"}')

In [None]:
print('\\n' + '='*70)
print('LOADING IN ARCGIS PRO')
print('='*70)
print('Method 1: Catalog Pane → Navigate to spacetimecube_output/')
print('          Right-click .nc file → Add to Current Map')
print('          Select COUNT variable → Time auto-detected')
print('')
print('Method 2: Map tab → Add Data → Browse to .nc file')
print('')
print('Method 3: Drag .nc file from File Explorer onto map')
print('')
print('Then: View → Time Slider to animate')
print('\\nIterative = per-bin counts (pulses)')
print('Cumulative = growing totals (spread)')
print('\\nDONE!')