# Tweet Processing with ArcPy - ArcGIS Pro Native

This notebook processes hurricane tweet data using **ArcPy only** - no external dependencies.

**Features:**
- Spatial operations using arcpy.da, arcpy.management, arcpy.analysis
- All data stored in project geodatabase (tw_project.gdb)
- Time-binned aggregation (4-hour intervals)
- Hierarchical geographic cascade (city → county → state)
- Raster creation for heat mapping
- Time-enabled outputs for ArcGIS Pro visualization

In [76]:
# ==============================================================================
# SETUP: Import ArcPy and Configure Environment
# ==============================================================================

import arcpy
import os
from datetime import datetime, timedelta
import json

# Set workspace to project geodatabase
project_root = os.getcwd()
gdb_path = os.path.join(project_root, 'data', 'tw_project.gdb')

# Create geodatabase if it doesn't exist
if not arcpy.Exists(gdb_path):
    gdb_folder = os.path.dirname(gdb_path)
    gdb_name = os.path.basename(gdb_path)
    arcpy.management.CreateFileGDB(gdb_folder, gdb_name)
    print(f"Created geodatabase: {gdb_path}")

arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True

print(f"Workspace set to: {arcpy.env.workspace}")
print(f"ArcGIS Pro Version: {arcpy.GetInstallInfo()['Version']}")

Workspace set to: C:\Users\colto\Documents\tw_project\tw_project\data\tw_project.gdb
ArcGIS Pro Version: 3.5


In [77]:
# ==============================================================================
# IMPORT DATA: Load GeoJSON into Geodatabase
# ==============================================================================

def import_geojson_to_gdb(geojson_path, output_fc_name):
    """
    Import GeoJSON to geodatabase feature class.
    """
    output_fc = os.path.join(gdb_path, output_fc_name)
    
    # Convert JSON to Features
    arcpy.conversion.JSONToFeatures(geojson_path, output_fc)
    
    print(f"Imported {output_fc_name}: {arcpy.management.GetCount(output_fc)[0]} features")
    return output_fc

# Import hurricane tweet data
helene_geojson = os.path.join(project_root, 'data', 'geojson', 'helene.geojson')
francine_geojson = os.path.join(project_root, 'data', 'geojson', 'francine.geojson')

tweets_helene_fc = import_geojson_to_gdb(helene_geojson, 'tweets_helene')
# tweets_francine_fc = import_geojson_to_gdb(francine_geojson, 'tweets_francine')

# Use Helene data as primary
tweets_fc = tweets_helene_fc

Imported tweets_helene: 0 features


In [78]:
# ==============================================================================
# LOAD REFERENCE GEOGRAPHY
# ==============================================================================

# Import US States
states_shp = os.path.join(project_root, 'data', 'shape_files', 'cb_2023_us_state_20m.shp')
states_fc = os.path.join(gdb_path, 'us_states')
arcpy.conversion.FeatureClassToFeatureClass(states_shp, gdb_path, 'us_states')
print(f"Imported US States: {arcpy.management.GetCount(states_fc)[0]} features")

# Import US Counties
counties_shp = os.path.join(project_root, 'data', 'shape_files', 'cb_2023_us_county_20m.shp')
counties_fc = os.path.join(gdb_path, 'us_counties')
arcpy.conversion.FeatureClassToFeatureClass(counties_shp, gdb_path, 'us_counties')
print(f"Imported US Counties: {arcpy.management.GetCount(counties_fc)[0]} features")

# Import US Cities (from CSV - create feature class)
cities_csv = os.path.join(project_root, 'data', 'tables', 'cities1000.csv')
cities_fc = os.path.join(gdb_path, 'us_cities')

# Create point feature class from XY data
arcpy.management.XYTableToPoint(
    cities_csv,
    cities_fc,
    'longitude',
    'latitude',
    coordinate_system=arcpy.SpatialReference(4326)
)

# Filter to US cities only
arcpy.analysis.Select(
    cities_fc,
    os.path.join(gdb_path, 'us_cities_filtered'),
    "country_code = 'US'"
)
cities_fc = os.path.join(gdb_path, 'us_cities_filtered')
print(f"Imported US Cities: {arcpy.management.GetCount(cities_fc)[0]} features")

Imported US States: 52 features
Imported US Counties: 3222 features
Imported US Cities: 17244 features


In [79]:
# ==============================================================================
# TIME BINNING: Create 4-hour time bins
# ==============================================================================

def add_time_bin_field(feature_class, time_field='time', bin_hours=4):
    """
    Add a time_bin field that groups times into N-hour intervals.
    """
    # Add time_bin field
    arcpy.management.AddField(feature_class, 'time_bin', 'DATE')
    arcpy.management.AddField(feature_class, 'time_bin_str', 'TEXT', field_length=50)
    
    # Calculate time bins using Python expression
    expression = f"""
def floor_to_hours(dt, hours):
    if dt is None:
        return None
    hour = (dt.hour // hours) * hours
    return dt.replace(hour=hour, minute=0, second=0, microsecond=0)
    
floor_to_hours(!{time_field}!, {bin_hours})
"""
    
    arcpy.management.CalculateField(
        feature_class,
        'time_bin',
        expression,
        'PYTHON3'
    )
    
    # Also create string version for labeling
    arcpy.management.CalculateField(
        feature_class,
        'time_bin_str',
        "!time_bin!.strftime('%Y-%m-%d %H:%M:%S') if !time_bin! else None",
        'PYTHON3'
    )
    
    print(f"Added time_bin field to {os.path.basename(feature_class)}")

# Add time bins to tweets
add_time_bin_field(tweets_fc, 'time', 4)

# Get unique time bins
time_bins = set()
with arcpy.da.SearchCursor(tweets_fc, ['time_bin']) as cursor:
    for row in cursor:
        if row[0]:
            time_bins.add(row[0])

time_bins = sorted(list(time_bins))
print(f"\nFound {len(time_bins)} time bins")
print(f"Range: {time_bins[0].strftime('%Y-%m-%d %H:%M')} to {time_bins[-1].strftime('%Y-%m-%d %H:%M')}")

Added time_bin field to tweets_helene

Found 0 time bins


<class 'IndexError'>: list index out of range

In [None]:
# ==============================================================================
# SPATIAL JOIN: Cascade counts from tweet points to geography
# ==============================================================================

def spatial_join_and_count(tweet_fc, target_fc, output_name, time_bin=None):
    """
    Perform spatial join to count tweets in each geographic unit.
    If time_bin specified, filter tweets to that time period.
    """
    # Filter tweets by time bin if specified
    if time_bin:
        bin_str = time_bin.strftime('%Y-%m-%d %H:%M:%S')
        where_clause = f"time_bin_str = '{bin_str}'"
        tweet_layer = arcpy.management.MakeFeatureLayer(tweet_fc, 'tweet_temp', where_clause)
    else:
        tweet_layer = tweet_fc
    
    # Spatial join (one-to-many, then summarize)
    output_fc = os.path.join(gdb_path, output_name)
    
    arcpy.analysis.SpatialJoin(
        target_fc,
        tweet_layer,
        output_fc,
        'JOIN_ONE_TO_ONE',
        'KEEP_ALL',
        match_option='CONTAINS'
    )
    
    # Rename Join_Count to tweet_count
    arcpy.management.AddField(output_fc, 'tweet_count', 'LONG')
    arcpy.management.CalculateField(output_fc, 'tweet_count', '!Join_Count!', 'PYTHON3')
    
    return output_fc

print("Creating spatial aggregations...")

# Aggregate for all time (no time filter)
states_all = spatial_join_and_count(tweets_fc, states_fc, 'states_all_time')
counties_all = spatial_join_and_count(tweets_fc, counties_fc, 'counties_all_time')

print(f"\nCreated aggregations:")
print(f"  States: {states_all}")
print(f"  Counties: {counties_all}")

In [None]:
# ==============================================================================
# TEMPORAL AGGREGATION: Create feature classes for each time bin
# ==============================================================================

def create_temporal_aggregations(tweet_fc, geography_fc, geography_name, time_bins):
    """
    Create separate feature classes for each time bin (incremental).
    Also create cumulative versions.
    """
    print(f"\nProcessing {geography_name} temporal aggregations...")
    
    incremental_fcs = []
    cumulative_fcs = []
    cumulative_counts = {}  # Track cumulative by entity ID
    
    for idx, time_bin in enumerate(time_bins):
        bin_str = time_bin.strftime('%Y%m%d_%H%M')
        print(f"  Processing bin {idx+1}/{len(time_bins)}: {time_bin.strftime('%Y-%m-%d %H:%M')}")
        
        # INCREMENTAL
        inc_name = f"{geography_name}_inc_{bin_str}"
        inc_fc = spatial_join_and_count(tweet_fc, geography_fc, inc_name, time_bin)
        
        # Add time bin field
        arcpy.management.AddField(inc_fc, 'time_bin', 'DATE')
        arcpy.management.AddField(inc_fc, 'time_bin_str', 'TEXT', field_length=50)
        arcpy.management.CalculateField(
            inc_fc, 'time_bin', 
            f"datetime.datetime({time_bin.year}, {time_bin.month}, {time_bin.day}, {time_bin.hour}, {time_bin.minute})",
            'PYTHON3'
        )
        arcpy.management.CalculateField(
            inc_fc, 'time_bin_str',
            f"'{time_bin.strftime('%Y-%m-%d %H:%M:%S')}'",
            'PYTHON3'
        )
        
        incremental_fcs.append(inc_fc)
        
        # CUMULATIVE - update running totals
        # Read current counts and add to cumulative
        id_field = get_id_field(geography_name)
        with arcpy.da.SearchCursor(inc_fc, [id_field, 'tweet_count']) as cursor:
            for row in cursor:
                entity_id = row[0]
                count = row[1] if row[1] else 0
                cumulative_counts[entity_id] = cumulative_counts.get(entity_id, 0) + count
        
        # Create cumulative feature class
        cum_name = f"{geography_name}_cum_{bin_str}"
        cum_fc = os.path.join(gdb_path, cum_name)
        arcpy.management.Copy(geography_fc, cum_fc)
        
        # Add cumulative count field
        arcpy.management.AddField(cum_fc, 'cumul_cnt', 'LONG')
        arcpy.management.AddField(cum_fc, 'time_bin', 'DATE')
        arcpy.management.AddField(cum_fc, 'time_bin_str', 'TEXT', field_length=50)
        
        # Update cumulative counts
        with arcpy.da.UpdateCursor(cum_fc, [id_field, 'cumul_cnt']) as cursor:
            for row in cursor:
                entity_id = row[0]
                row[1] = cumulative_counts.get(entity_id, 0)
                cursor.updateRow(row)
        
        # Set time bin
        arcpy.management.CalculateField(
            cum_fc, 'time_bin',
            f"datetime.datetime({time_bin.year}, {time_bin.month}, {time_bin.day}, {time_bin.hour}, {time_bin.minute})",
            'PYTHON3'
        )
        arcpy.management.CalculateField(
            cum_fc, 'time_bin_str',
            f"'{time_bin.strftime('%Y-%m-%d %H:%M:%S')}'",
            'PYTHON3'
        )
        
        cumulative_fcs.append(cum_fc)
    
    return incremental_fcs, cumulative_fcs

def get_id_field(geography_name):
    """Get the unique ID field for each geography type."""
    if 'state' in geography_name.lower():
        return 'STUSPS'
    elif 'count' in geography_name.lower():
        return 'GEOID'
    elif 'cit' in geography_name.lower():
        return 'geonameid'
    return 'OBJECTID'

# Create temporal aggregations
states_inc, states_cum = create_temporal_aggregations(tweets_fc, states_fc, 'states', time_bins)
counties_inc, counties_cum = create_temporal_aggregations(tweets_fc, counties_fc, 'counties', time_bins)

print(f"\nCreated {len(states_inc)} incremental state feature classes")
print(f"Created {len(states_cum)} cumulative state feature classes")
print(f"Created {len(counties_inc)} incremental county feature classes")
print(f"Created {len(counties_cum)} cumulative county feature classes")

In [None]:
# ==============================================================================
# MERGE TEMPORAL DATA: Create master feature classes
# ==============================================================================

def merge_temporal_fcs(fc_list, output_name):
    """
    Merge all temporal feature classes into one master feature class.
    """
    output_fc = os.path.join(gdb_path, output_name)
    arcpy.management.Merge(fc_list, output_fc)
    count = arcpy.management.GetCount(output_fc)[0]
    print(f"Merged {len(fc_list)} feature classes into {output_name}: {count} features")
    return output_fc

print("\nMerging temporal feature classes...")

# Merge incremental
states_inc_all = merge_temporal_fcs(states_inc, 'states_INCREMENTAL_ALL')
counties_inc_all = merge_temporal_fcs(counties_inc, 'counties_INCREMENTAL_ALL')

# Merge cumulative
states_cum_all = merge_temporal_fcs(states_cum, 'states_CUMULATIVE_ALL')
counties_cum_all = merge_temporal_fcs(counties_cum, 'counties_CUMULATIVE_ALL')

print("\n✓ Master feature classes created!")

In [None]:
# ==============================================================================
# RASTER CREATION: Convert tweet density to rasters
# ==============================================================================

def create_kernel_density_raster(point_fc, output_raster, cell_size=0.1, search_radius=1.0):
    """
    Create kernel density raster from tweet points.
    """
    arcpy.sa.KernelDensity(
        point_fc,
        'NONE',  # No population field
        cell_size,
        search_radius,
        'SQUARE_KILOMETERS'
    ).save(output_raster)
    
    print(f"Created kernel density raster: {os.path.basename(output_raster)}")
    return output_raster

def create_polygon_to_raster(polygon_fc, value_field, output_raster, cell_size=0.1):
    """
    Convert polygon feature class to raster using a value field.
    """
    arcpy.conversion.PolygonToRaster(
        polygon_fc,
        value_field,
        output_raster,
        'CELL_CENTER',
        'NONE',
        cell_size
    )
    
    print(f"Created polygon raster: {os.path.basename(output_raster)}")
    return output_raster

print("\nCreating rasters...")

# Create overall density raster
density_raster = os.path.join(gdb_path, 'tweet_density_all')
create_kernel_density_raster(tweets_fc, density_raster, cell_size=0.05, search_radius=0.5)

# Create state count raster
states_raster = os.path.join(gdb_path, 'states_count_raster')
create_polygon_to_raster(states_all, 'tweet_count', states_raster, cell_size=0.1)

# Create county count raster
counties_raster = os.path.join(gdb_path, 'counties_count_raster')
create_polygon_to_raster(counties_all, 'tweet_count', counties_raster, cell_size=0.05)

print("\n✓ Rasters created!")

In [None]:
# ==============================================================================
# TEMPORAL RASTERS: Create rasters for each time bin
# ==============================================================================

def create_temporal_rasters(fc_list, value_field, output_prefix, cell_size=0.05):
    """
    Create rasters for each temporal feature class.
    """
    rasters = []
    
    for idx, fc in enumerate(fc_list):
        fc_name = os.path.basename(fc)
        raster_name = f"{output_prefix}_{fc_name}"
        output_raster = os.path.join(gdb_path, raster_name)
        
        try:
            arcpy.conversion.PolygonToRaster(
                fc,
                value_field,
                output_raster,
                'CELL_CENTER',
                'NONE',
                cell_size
            )
            rasters.append(output_raster)
            print(f"  Created raster {idx+1}/{len(fc_list)}: {raster_name}")
        except Exception as e:
            print(f"  Error creating {raster_name}: {e}")
    
    return rasters

print("\nCreating temporal rasters...")

# Create incremental rasters
print("  Incremental state rasters...")
states_inc_rasters = create_temporal_rasters(states_inc, 'tweet_count', 'raster_states_inc')

print("  Cumulative state rasters...")
states_cum_rasters = create_temporal_rasters(states_cum, 'cumul_cnt', 'raster_states_cum')

print(f"\n✓ Created {len(states_inc_rasters)} incremental rasters")
print(f"✓ Created {len(states_cum_rasters)} cumulative rasters")

In [None]:
# ==============================================================================
# SUMMARY & VALIDATION
# ==============================================================================

print("\n" + "="*60)
print("PROCESSING COMPLETE - SUMMARY")
print("="*60)

print("\nGEODATABASE CONTENTS:")
arcpy.env.workspace = gdb_path

# List feature classes
feature_classes = arcpy.ListFeatureClasses()
print(f"\nFeature Classes: {len(feature_classes)}")
for fc in sorted(feature_classes)[:20]:  # Show first 20
    count = arcpy.management.GetCount(fc)[0]
    print(f"  {fc}: {count} features")
if len(feature_classes) > 20:
    print(f"  ... and {len(feature_classes) - 20} more")

# List rasters
rasters = arcpy.ListRasters()
print(f"\nRasters: {len(rasters)}")
for raster in sorted(rasters)[:10]:  # Show first 10
    print(f"  {raster}")
if len(rasters) > 10:
    print(f"  ... and {len(rasters) - 10} more")

print("\n" + "="*60)
print("KEY OUTPUTS FOR ARCGIS PRO VISUALIZATION:")
print("="*60)
print("\nTemporal Feature Classes (time-enabled):")
print("  - states_INCREMENTAL_ALL")
print("  - states_CUMULATIVE_ALL")
print("  - counties_INCREMENTAL_ALL")
print("  - counties_CUMULATIVE_ALL")
print("\nTo enable time:")
print("  1. Add layer to map")
print("  2. Right-click → Properties → Time tab")
print("  3. Enable time using 'time_bin' field")
print("  4. Set time step to 4 hours")
print("\nRasters:")
print("  - tweet_density_all (kernel density)")
print("  - states_count_raster")
print("  - counties_count_raster")
print("  - raster_states_inc_* (temporal)")
print("  - raster_states_cum_* (temporal)")
print("\n" + "="*60)

In [None]:
# ==============================================================================
# EXPORT TO SHAPEFILE (Optional - for external use)
# ==============================================================================

def export_to_shapefile(input_fc, output_folder):
    """
    Export feature class to shapefile for external use.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    fc_name = os.path.basename(input_fc)
    output_shp = os.path.join(output_folder, f"{fc_name}.shp")
    
    arcpy.conversion.FeatureClassToShapefile([input_fc], output_folder)
    print(f"Exported: {output_shp}")
    return output_shp

# Export key outputs
output_folder = os.path.join(project_root, 'arcgis_outputs', 'shapefiles')

print("\nExporting to shapefiles...")
export_to_shapefile(states_inc_all, output_folder)
export_to_shapefile(states_cum_all, output_folder)
export_to_shapefile(counties_inc_all, output_folder)
export_to_shapefile(counties_cum_all, output_folder)

print(f"\n✓ Shapefiles exported to: {output_folder}")