# Space Time Cube Generator

Creates ArcGIS-compatible Space Time Cube (NetCDF) from hurricane tweets.

In [1]:
# Configuration
event = 'helene'  # or 'francine'
cell_size_km = 10  # Grid cell size
time_step_hours = 4  # Time bin size

print(f'Creating Space Time Cube for {event}')
print(f'Cell: {cell_size_km}km, Time: {time_step_hours}h')

Creating Space Time Cube for helene
Cell: 10km, Time: 4h


In [2]:
import geopandas as gpd
import pandas as pd
import numpy as np
from netCDF4 import Dataset
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

print('Ready')

Ready


In [3]:
# Load tweets
data_path = Path(r'C:\Users\colto\Documents\GitHub\Tweet_project\data\geojson')
tweets = gpd.read_file(data_path / f'{event}.geojson')
tweets['time'] = pd.to_datetime(tweets['time'])

# Reproject to meters (Albers Equal Area)
if tweets.crs is None:
    tweets.set_crs('EPSG:4326', inplace=True)
tweets = tweets.to_crs('EPSG:5070')

tweets['x'] = tweets.geometry.x
tweets['y'] = tweets.geometry.y

print(f'Loaded {len(tweets)} tweets')
print(f'Time range: {tweets.time.min()} to {tweets.time.max()}')

Loaded 3007 tweets
Time range: 2024-09-26 02:29:25+00:00 to 2024-09-27 19:59:41+00:00


In [4]:
# Create spatial grid
cell_m = cell_size_km * 1000

xmin, ymin, xmax, ymax = tweets.total_bounds
xmin = np.floor(xmin / cell_m) * cell_m
ymin = np.floor(ymin / cell_m) * cell_m
xmax = np.ceil(xmax / cell_m) * cell_m
ymax = np.ceil(ymax / cell_m) * cell_m

x_bins = np.arange(xmin, xmax + cell_m, cell_m)
y_bins = np.arange(ymin, ymax + cell_m, cell_m)

print(f'Grid: {len(x_bins)-1} x {len(y_bins)-1} cells')

Grid: 120 x 175 cells


In [5]:
# Create time bins
time_min = tweets.time.min().floor(f'{time_step_hours}h')
time_max = tweets.time.max().ceil(f'{time_step_hours}h')
time_bins = pd.date_range(time_min, time_max, freq=f'{time_step_hours}h')

print(f'Time: {len(time_bins)-1} bins of {time_step_hours}h')

Time: 11 bins of 4h


In [6]:
# Assign to bins
tweets['x_bin'] = pd.cut(tweets.x, bins=x_bins, labels=False, include_lowest=True)
tweets['y_bin'] = pd.cut(tweets.y, bins=y_bins, labels=False, include_lowest=True)
tweets['t_bin'] = pd.cut(tweets.time, bins=time_bins, labels=False, include_lowest=True)

# Drop NaN (tweets outside grid)
tweets_binned = tweets.dropna(subset=['x_bin', 'y_bin', 't_bin']).copy()
tweets_binned['x_bin'] = tweets_binned.x_bin.astype(int)
tweets_binned['y_bin'] = tweets_binned.y_bin.astype(int)
tweets_binned['t_bin'] = tweets_binned.t_bin.astype(int)

print(f'{len(tweets_binned)} tweets in grid ({len(tweets_binned)/len(tweets)*100:.1f}%)')

3007 tweets in grid (100.0%)


In [7]:
# Create 3D cube
nx = len(x_bins) - 1
ny = len(y_bins) - 1
nt = len(time_bins) - 1

cube = np.zeros((nt, ny, nx), dtype=np.int32)

# Count tweets per cell
for idx, row in tweets_binned.iterrows():
    cube[row.t_bin, row.y_bin, row.x_bin] += 1

print(f'Cube shape: {cube.shape} (time, y, x)')
print(f'Total tweets in cube: {cube.sum()}')
print(f'Non-empty cells: {(cube > 0).sum()}')

Cube shape: (11, 175, 120) (time, y, x)
Total tweets in cube: 3007
Non-empty cells: 600


In [8]:
# Export as NetCDF Space Time Cube
out_dir = Path(r'C:\Users\colto\Documents\GitHub\Tweet_project\spacetimecube_output')
out_dir.mkdir(exist_ok=True)
out_file = out_dir / f'{event}_spacetimecube.nc'

tweets['time'] = pd.to_datetime(tweets['time']).dt.tz_localize(None)

time_min = tweets.time.min().floor(f'{time_step_hours}h')
time_max = tweets.time.max().ceil(f'{time_step_hours}h')
time_bins = pd.date_range(time_min, time_max, freq=f'{time_step_hours}h')

with Dataset(out_file, 'w', format='NETCDF4') as nc:
    # Dimensions
    nc.createDimension('x', nx)
    nc.createDimension('y', ny)
    nc.createDimension('time', nt)
    
    # Coordinate variables
    x_var = nc.createVariable('x', 'f8', ('x',))
    x_var[:] = (x_bins[:-1] + x_bins[1:]) / 2  # Cell centers
    x_var.units = 'meters'
    x_var.long_name = 'x coordinate (Albers Equal Area)'
    
    y_var = nc.createVariable('y', 'f8', ('y',))
    y_var[:] = (y_bins[:-1] + y_bins[1:]) / 2
    y_var.units = 'meters'
    y_var.long_name = 'y coordinate (Albers Equal Area)'

    t_var = nc.createVariable('time', 'f8', ('time',))
    epoch = pd.Timestamp('1970-01-01')  # naive
    t_var[:] = ((time_bins[:-1] - epoch) / pd.Timedelta(hours=1)).to_numpy(dtype='float64')
    t_var.units = 'hours since 1970-01-01 00:00:00'
    t_var.calendar = 'gregorian'
    t_var.long_name = 'time'
    
    # Data variable
    count_var = nc.createVariable('COUNT', 'i4', ('time', 'y', 'x'), 
                                   zlib=True, complevel=4,
                                   fill_value=-1)
    count_var[:] = cube
    count_var.long_name = 'Tweet count per space-time bin'
    count_var.units = 'count'
    
    # Global attributes
    nc.title = f'{event.title()} Hurricane Space Time Cube'
    nc.institution = 'Tweet Project'
    nc.source = 'Twitter/X hurricane data'
    nc.Conventions = 'CF-1.6'
    nc.crs = 'EPSG:5070'
    nc.cell_size_meters = cell_m
    nc.time_step_hours = time_step_hours
    nc.creation_date = pd.Timestamp.now().isoformat()

print(f'\nSaved: {out_file}')
print(f'Size: {out_file.stat().st_size / 1024:.1f} KB')


Saved: C:\Users\colto\Documents\GitHub\Tweet_project\spacetimecube_output\helene_spacetimecube.nc
Size: 19.6 KB


In [9]:
# Summary
print('\n' + '='*60)
print('SPACE TIME CUBE CREATED')
print('='*60)
print(f'File: {out_file.name}')
print(f'Event: {event}')
print(f'Tweets: {cube.sum()} in {(cube>0).sum()} non-empty cells')
print(f'Dimensions: {nx}x × {ny}y × {nt}t')
print(f'Resolution: {cell_size_km}km, {time_step_hours}h')
print('\nLoad in ArcGIS Pro:')
print('  1. Add Multidimensional Raster Layer')
print('  2. Browse to .nc file')
print('  3. Select COUNT variable')
print('  4. Use Time Slider to animate')


SPACE TIME CUBE CREATED
File: helene_spacetimecube.nc
Event: helene
Tweets: 3007 in 600 non-empty cells
Dimensions: 120x × 175y × 11t
Resolution: 10km, 4h

Load in ArcGIS Pro:
  1. Add Multidimensional Raster Layer
  2. Browse to .nc file
  3. Select COUNT variable
  4. Use Time Slider to animate
