# Hurricane Social Media Analysis - ArcPy Native Implementation
## Complete ArcGIS Pro Native Notebook

This notebook uses **primarily ArcPy** for all spatial operations, with minimal external dependencies.

**Key Features:**
- Native ArcPy feature class and raster operations
- Temporary geodatabase objects (automatically cleaned up)
- Multi-level geographic matching
- Kernel Density Estimation using arcpy.sa.KernelDensity
- Hierarchical weighted rasterization
- Time-binned GeoTIFF outputs

**Dependencies:**
- arcpy (included with ArcGIS Pro)
- numpy (included with ArcGIS Pro)
- fuzzywuzzy (for text matching)

---
## Cell 1: Configuration & Setup

In [6]:
import arcpy
import os
import re
import numpy as np
from datetime import datetime, timedelta
from typing import Dict, List, Tuple, Any, Optional
from collections import defaultdict
from fuzzywuzzy import fuzz, process

# Allow overwriting outputs
arcpy.env.overwriteOutput = True

# Check out Spatial Analyst extension
if arcpy.CheckExtension("Spatial") == "Available":
    arcpy.CheckOutExtension("Spatial")
    print("Spatial Analyst extension checked out")
else:
    raise Exception("Spatial Analyst extension not available")

# === CONFIGURATION ===

# Base Path
LOCAL_PATH = r"C:\users\colto\documents\github\tweet_project"

# Data Directories
DATA_DIR = os.path.join(LOCAL_PATH, "data")
GEOJSON_DIR = os.path.join(DATA_DIR, "geojson")
SHAPE_FILES_DIR = os.path.join(DATA_DIR, "shape_files")
OUTPUT_DIR = os.path.join(LOCAL_PATH, "rasters_output_arcpy")

# Input Paths
FRANCINE_PATH = os.path.join(GEOJSON_DIR, "francine.geojson")
HELENE_PATH = os.path.join(GEOJSON_DIR, "helene.geojson")
STATES_PATH = os.path.join(SHAPE_FILES_DIR, "cb_2023_us_state_20m.shp")
COUNTIES_PATH = os.path.join(SHAPE_FILES_DIR, "cb_2023_us_county_20m.shp")
CITIES_PATH = os.path.join(SHAPE_FILES_DIR, "US_Cities.shp")

# Workspace - Create scratch geodatabase for temporary features
SCRATCH_GDB = os.path.join(LOCAL_PATH, "scratch.gdb")
if not arcpy.Exists(SCRATCH_GDB):
    arcpy.CreateFileGDB_management(LOCAL_PATH, "scratch.gdb")
arcpy.env.workspace = SCRATCH_GDB
arcpy.env.scratchWorkspace = SCRATCH_GDB

# Spatial Reference
TARGET_SR = arcpy.SpatialReference(3857)  # Web Mercator
WGS84_SR = arcpy.SpatialReference(4326)  # WGS84

# Raster Settings
CELL_SIZE_M = 1000  # 1km cells

# Hierarchical Weights
WEIGHTS: Dict[str, int] = {
    "STATE": 2,
    "COUNTY": 5,
    "CITY": 10,
    "FACILITY": 10,
}

# Fuzzy Matching & Time
FUZZY_THRESHOLD = 75
FUZZY_THRESHOLD_CONTEXTUAL = 70
TIME_BIN_HOURS = 4

# KDE Parameters
CITY_KDE_SEARCH_RADIUS = 3000  # 3km search radius for city KDE
FACILITY_KDE_SEARCH_RADIUS = 2000  # 2km for facilities

print("Configuration loaded")
print(f"Scratch GDB: {SCRATCH_GDB}")
print(f"Output: {OUTPUT_DIR}")

<class 'ModuleNotFoundError'>: No module named 'fuzzywuzzy'

---
## Cell 2: Utility Functions

In [None]:
def cleanup_temp_features(pattern="temp_*"):
    """
    Delete temporary feature classes matching pattern.
    """
    arcpy.env.workspace = SCRATCH_GDB
    for fc in arcpy.ListFeatureClasses(pattern):
        try:
            arcpy.Delete_management(fc)
        except:
            pass
    for table in arcpy.ListTables(pattern):
        try:
            arcpy.Delete_management(table)
        except:
            pass


def preprocess_place_name(name: Any) -> Optional[str]:
    """
    Standardize place names for matching.
    """
    if not name or name == "NAN" or str(name).strip() == "":
        return None
    
    name = str(name).upper().strip()
    
    # Expand abbreviations
    name = re.sub(r"\bST\.?\b", "SAINT", name)
    name = re.sub(r"\bMT\.?\b", "MOUNT", name)
    name = re.sub(r"\bFT\.?\b", "FORT", name)
    name = re.sub(r"\bN\.?\b", "NORTH", name)
    name = re.sub(r"\bS\.?\b", "SOUTH", name)
    name = re.sub(r"\bE\.?\b", "EAST", name)
    name = re.sub(r"\bW\.?\b", "WEST", name)
    
    # Remove punctuation
    name = re.sub(r"[^\w\s]", "", name)
    name = re.sub(r"\s+", " ", name)
    
    return name.strip()


def parse_gpe_entities(gpe_string: Any) -> List[str]:
    """
    Parse GPE field into cleaned entity list.
    """
    if not gpe_string or str(gpe_string).strip() == "":
        return []
    
    gpe_string = str(gpe_string).strip()
    entities: List[str] = []
    
    parts = [p.strip() for p in gpe_string.split(",")]
    
    for part in parts:
        if part:
            sub_parts = re.split(r"[;&|]", part)
            for sub in sub_parts:
                sub = sub.strip()
                if sub and len(sub) > 1:
                    cleaned = preprocess_place_name(sub)
                    if cleaned:
                        entities.append(cleaned)
    
    # Remove duplicates while preserving order
    seen = set()
    unique = []
    for e in entities:
        if e not in seen:
            unique.append(e)
            seen.add(e)
    
    return unique


def floor_to_hours(dt: datetime, hours: int) -> datetime:
    """
    Floor datetime to the nearest time bin.
    """
    hour_floored = (dt.hour // hours) * hours
    return dt.replace(hour=hour_floored, minute=0, second=0, microsecond=0)


print("Utility functions defined")

---
## Cell 3: Data Loading with ArcPy

In [None]:
def load_hurricane_data_arcpy(geojson_path: str, hurricane_name: str) -> Tuple[str, Dict[str, Any]]:
    """
    Load GeoJSON into feature class, add time bins.
    Returns: (feature_class_path, metadata_dict)
    """
    print(f"\nLoading {hurricane_name} from {geojson_path}...")
    
    # Convert GeoJSON to feature class
    temp_fc = f"temp_{hurricane_name}_raw"
    arcpy.JSONToFeatures_conversion(geojson_path, temp_fc)
    
    # Add fields for time processing
    arcpy.AddField_management(temp_fc, "time_bin", "DATE")
    arcpy.AddField_management(temp_fc, "unix_timestamp", "LONG")
    arcpy.AddField_management(temp_fc, "bin_label", "TEXT", field_length=50)
    
    # Process time fields
    time_bins = set()
    timestamp_dict = {}
    count = 0
    
    with arcpy.da.UpdateCursor(temp_fc, ["time", "time_bin", "unix_timestamp", "bin_label"]) as cursor:
        for row in cursor:
            if row[0]:  # if time field exists
                # Parse time string
                try:
                    dt = datetime.fromisoformat(str(row[0]).replace('Z', '+00:00'))
                except:
                    dt = datetime.strptime(str(row[0]), "%Y-%m-%d %H:%M:%S")
                
                # Floor to time bin
                time_bin = floor_to_hours(dt, TIME_BIN_HOURS)
                unix_ts = int(time_bin.timestamp() * 1000)  # milliseconds
                bin_label = time_bin.strftime("%Y%m%d_%H%M")
                
                row[1] = time_bin
                row[2] = unix_ts
                row[3] = bin_label
                
                cursor.updateRow(row)
                
                time_bins.add(unix_ts)
                timestamp_dict[unix_ts] = time_bin
                count += 1
    
    print(f"  Loaded {count} {hurricane_name} tweets")
    print(f"  Time bins: {len(time_bins)}")
    
    metadata = {
        "count": count,
        "time_bins": sorted(list(time_bins)),
        "timestamp_dict": timestamp_dict
    }
    
    return temp_fc, metadata


def load_reference_layers() -> Dict[str, str]:
    """
    Load and project reference shapefiles to target SR.
    Returns dict of layer names to feature class paths.
    """
    print("\nLoading reference shapefiles...")
    
    layers = {}
    
    # States
    states_proj = "temp_states_proj"
    arcpy.Project_management(STATES_PATH, states_proj, TARGET_SR)
    count = int(arcpy.GetCount_management(states_proj)[0])
    print(f"  States: {count}")
    layers["states"] = states_proj
    
    # Counties
    counties_proj = "temp_counties_proj"
    arcpy.Project_management(COUNTIES_PATH, counties_proj, TARGET_SR)
    count = int(arcpy.GetCount_management(counties_proj)[0])
    print(f"  Counties: {count}")
    layers["counties"] = counties_proj
    
    # Cities - convert to points (centroids)
    cities_temp = "temp_cities_temp"
    cities_proj = "temp_cities_proj"
    arcpy.Project_management(CITIES_PATH, cities_temp, TARGET_SR)
    arcpy.FeatureToPoint_management(cities_temp, cities_proj, "INSIDE")
    count = int(arcpy.GetCount_management(cities_proj)[0])
    print(f"  Cities (as points): {count}")
    layers["cities"] = cities_proj
    
    # Clean up temp
    if arcpy.Exists(cities_temp):
        arcpy.Delete_management(cities_temp)
    
    return layers


print("Data loading functions defined")

---
## Cell 4: Geographic Matching Functions

In [None]:
def create_hierarchical_lookups_arcpy(layers: Dict[str, str]) -> Dict[str, Any]:
    """
    Build lookup dictionaries from feature classes using cursors.
    """
    print("\nCreating hierarchical lookups...")
    
    lookups = {
        "state_lookup": {},
        "county_lookup": {},
        "city_lookup": {},
        "county_by_state": defaultdict(dict),
        "city_by_state": defaultdict(dict),
        "state_abbrev_to_name": {},
        "state_name_to_abbrev": {},
    }
    
    # States
    with arcpy.da.SearchCursor(layers["states"], ["NAME", "STUSPS", "SHAPE@"]) as cursor:
        for row in cursor:
            name = preprocess_place_name(row[0])
            abbrev = str(row[1]).upper() if row[1] else None
            geom = row[2]
            
            if name:
                lookups["state_lookup"][name] = geom
                if abbrev:
                    lookups["state_lookup"][abbrev] = geom
                    lookups["state_abbrev_to_name"][abbrev] = name
                    lookups["state_name_to_abbrev"][name] = abbrev
    
    # Counties
    with arcpy.da.SearchCursor(layers["counties"], ["NAME", "STATEFP", "SHAPE@"]) as cursor:
        for row in cursor:
            name = preprocess_place_name(row[0])
            state_fips = str(row[1]) if row[1] else None
            geom = row[2]
            
            if name:
                lookups["county_lookup"][name] = geom
                
                # Match to state
                if state_fips:
                    with arcpy.da.SearchCursor(layers["states"], ["NAME", "STATEFP"]) as s_cursor:
                        for s_row in s_cursor:
                            if str(s_row[1]) == state_fips:
                                state_name = preprocess_place_name(s_row[0])
                                if state_name:
                                    lookups["county_by_state"][state_name][name] = geom
                                break
    
    # Cities (points)
    with arcpy.da.SearchCursor(layers["cities"], ["NAME", "ST", "SHAPE@"]) as cursor:
        for row in cursor:
            name = preprocess_place_name(row[0])
            state_abbrev = str(row[1]).upper() if row[1] else None
            geom = row[2]  # Point geometry
            
            if name:
                lookups["city_lookup"][name] = geom
                
                if state_abbrev and state_abbrev in lookups["state_abbrev_to_name"]:
                    state_full = lookups["state_abbrev_to_name"][state_abbrev]
                    lookups["city_by_state"][state_full][name] = geom
    
    print(f"  States: {len(lookups['state_lookup'])}")
    print(f"  Counties: {len(lookups['county_lookup'])}")
    print(f"  Cities: {len(lookups['city_lookup'])}")
    
    return lookups


def fuzzy_match_entity(
    entity: Optional[str],
    candidates: Dict[str, Any],
    threshold: int = FUZZY_THRESHOLD,
) -> Tuple[Optional[str], int]:
    """
    Fuzzy match entity against candidates.
    """
    if not entity or not candidates:
        return None, 0
    
    if entity in candidates:
        return entity, 100
    
    match = process.extractOne(entity, candidates.keys(), scorer=fuzz.ratio)
    if match and match[1] >= threshold:
        return match[0], match[1]
    
    return None, 0


def find_all_geographic_matches(
    entities: List[str],
    lookups: Dict[str, Any],
) -> List[Tuple[str, Optional[str], Any, int]]:
    """
    Find all STATE/COUNTY/CITY matches.
    """
    if not entities:
        return []
    
    all_matches: List[Tuple[str, Optional[str], Any, int]] = []
    found_states: set[str] = set()
    
    # STATES
    for entity in entities:
        state_match, state_score = fuzzy_match_entity(entity, lookups["state_lookup"], threshold=75)
        if state_match:
            all_matches.append(("STATE", state_match, lookups["state_lookup"][state_match], state_score))
            found_states.add(state_match)
    
    # COUNTIES
    for entity in entities:
        county_match, county_score = fuzzy_match_entity(entity, lookups["county_lookup"], threshold=75)
        if county_match:
            all_matches.append(("COUNTY", county_match, lookups["county_lookup"][county_match], county_score))
        
        # Contextual within states
        for state_name in found_states:
            if state_name in lookups["county_by_state"]:
                state_counties = lookups["county_by_state"][state_name]
                state_county_match, state_county_score = fuzzy_match_entity(
                    entity, state_counties, threshold=70
                )
                if state_county_match and state_county_score > county_score:
                    all_matches = [
                        m for m in all_matches
                        if not (m[0] == "COUNTY" and m[1] == county_match)
                    ]
                    all_matches.append(
                        ("COUNTY", state_county_match, state_counties[state_county_match], state_county_score)
                    )
    
    # CITIES
    for entity in entities:
        city_match, city_score = fuzzy_match_entity(entity, lookups["city_lookup"], threshold=75)
        if city_match:
            all_matches.append(("CITY", city_match, lookups["city_lookup"][city_match], city_score))
        
        # Contextual
        for state_name in found_states:
            if state_name in lookups["city_by_state"]:
                state_cities = lookups["city_by_state"][state_name]
                state_city_match, state_city_score = fuzzy_match_entity(
                    entity, state_cities, threshold=70
                )
                if state_city_match and state_city_score > city_score:
                    all_matches = [
                        m for m in all_matches
                        if not (m[0] == "CITY" and m[1] == city_match)
                    ]
                    all_matches.append(
                        ("CITY", state_city_match, state_cities[state_city_match], state_city_score)
                    )
    
    # De-duplicate
    unique_matches: List[Tuple[str, Optional[str], Any, int]] = []
    seen: set[Tuple[str, Optional[str]]] = set()
    for match in all_matches:
        combo = (match[0], match[1])
        if combo not in seen:
            unique_matches.append(match)
            seen.add(combo)
    
    return unique_matches


print("Geographic matching functions defined")

---
## Cell 5: Tweet Expansion & Aggregation

In [None]:
def expand_tweets_arcpy(
    input_fc: str,
    lookups: Dict[str, Any],
    output_fc: str,
    hurricane_name: str
) -> str:
    """
    Expand tweets to multiple matches, create output feature class.
    Returns path to output feature class.
    """
    print(f"\nExpanding {hurricane_name} tweets by matches...")
    
    # Create output feature class schema
    arcpy.CreateFeatureclass_management(
        SCRATCH_GDB,
        output_fc,
        "POINT",
        spatial_reference=TARGET_SR
    )
    
    # Add fields
    arcpy.AddField_management(output_fc, "original_id", "LONG")
    arcpy.AddField_management(output_fc, "unix_timestamp", "LONG")
    arcpy.AddField_management(output_fc, "bin_label", "TEXT", field_length=50)
    arcpy.AddField_management(output_fc, "scale_level", "TEXT", field_length=20)
    arcpy.AddField_management(output_fc, "matched_name", "TEXT", field_length=200)
    arcpy.AddField_management(output_fc, "match_score", "SHORT")
    arcpy.AddField_management(output_fc, "GPE", "TEXT", field_length=500)
    arcpy.AddField_management(output_fc, "FAC", "TEXT", field_length=200)
    
    # Process tweets
    insert_fields = [
        "SHAPE@", "original_id", "unix_timestamp", "bin_label",
        "scale_level", "matched_name", "match_score", "GPE", "FAC"
    ]
    
    count = 0
    expanded_count = 0
    scale_counts = defaultdict(int)
    
    with arcpy.da.SearchCursor(input_fc, ["OID@", "SHAPE@", "unix_timestamp", "bin_label", "GPE", "FAC"]) as s_cursor:
        with arcpy.da.InsertCursor(output_fc, insert_fields) as i_cursor:
            for row in s_cursor:
                oid, geom, unix_ts, bin_label, gpe, fac = row
                
                if count % 100 == 0:
                    print(f"  Processing tweet {count}...")
                
                # Parse GPE entities
                entities = parse_gpe_entities(gpe)
                matches = []
                
                if entities:
                    matches = find_all_geographic_matches(entities, lookups)
                
                # Add facility
                if fac and str(fac).strip() not in ["", "nan", "NAN"]:
                    matches.append(("FACILITY", str(fac), geom, 100))
                
                # If no matches, add UNMATCHED
                if not matches:
                    matches.append(("UNMATCHED", None, geom, 0))
                
                # Insert all matches
                for scale, name, matched_geom, score in matches:
                    i_cursor.insertRow([
                        geom,  # Use original tweet point location
                        oid,
                        unix_ts,
                        bin_label,
                        scale,
                        name if name else "",
                        score,
                        gpe if gpe else "",
                        fac if fac else ""
                    ])
                    expanded_count += 1
                    scale_counts[scale] += 1
                
                count += 1
    
    print(f"\n  Expanded from {count} to {expanded_count} rows")
    print(f"\n  Scale distribution:")
    for scale, cnt in sorted(scale_counts.items()):
        print(f"    {scale}: {cnt}")
    
    return output_fc


def create_interval_counts_dict(expanded_fc: str) -> Dict[Tuple[int, str, str], int]:
    """
    Create interval counts as dictionary.
    Key: (unix_timestamp, scale_level, matched_name)
    Value: count
    """
    print("\nCreating interval counts...")
    
    counts = defaultdict(int)
    
    with arcpy.da.SearchCursor(expanded_fc, ["unix_timestamp", "scale_level", "matched_name"]) as cursor:
        for row in cursor:
            key = (row[0], row[1], row[2] if row[2] else "")
            counts[key] += 1
    
    print(f"  Created {len(counts)} unique (time, scale, name) combinations")
    
    return dict(counts)


print("Expansion & aggregation functions defined")

---
## Cell 6: Rasterization with ArcPy

In [None]:
def calculate_grid_extent(francine_fc: str, helene_fc: str) -> Dict[str, Any]:
    """
    Calculate grid extent and parameters.
    """
    print("\nCalculating grid extent...")
    
    # Get extents
    desc_f = arcpy.Describe(francine_fc)
    desc_h = arcpy.Describe(helene_fc)
    
    extent_f = desc_f.extent
    extent_h = desc_h.extent
    
    # Union extent
    minx = min(extent_f.XMin, extent_h.XMin)
    miny = min(extent_f.YMin, extent_h.YMin)
    maxx = max(extent_f.XMax, extent_h.XMax)
    maxy = max(extent_f.YMax, extent_h.YMax)
    
    # Calculate grid dimensions
    width = int(np.ceil((maxx - minx) / CELL_SIZE_M))
    height = int(np.ceil((maxy - miny) / CELL_SIZE_M))
    
    # Adjust maxx/maxy to align with grid
    maxx = minx + (width * CELL_SIZE_M)
    maxy = miny + (height * CELL_SIZE_M)
    
    extent_str = f"{minx} {miny} {maxx} {maxy}"
    
    print(f"  Cell size: {CELL_SIZE_M} meters")
    print(f"  Grid: {width} x {height} cells")
    print(f"  Extent: {extent_str}")
    
    return {
        "width": width,
        "height": height,
        "extent": extent_str,
        "minx": minx,
        "miny": miny,
        "maxx": maxx,
        "maxy": maxy,
        "cell_size": CELL_SIZE_M
    }


def create_hierarchical_raster_arcpy(
    expanded_fc: str,
    unix_timestamp: int,
    interval_counts: Dict[Tuple[int, str, str], int],
    lookups: Dict[str, Any],
    reference_layers: Dict[str, str],
    grid_params: Dict[str, Any]
) -> np.ndarray:
    """
    Create hierarchical weighted raster for single time bin using ArcPy.
    """
    # Initialize output array
    output_grid = np.zeros((grid_params["height"], grid_params["width"]), dtype=np.float32)
    
    # Get data for this time bin
    bin_data = {k: v for k, v in interval_counts.items() if k[0] == unix_timestamp}
    
    if not bin_data:
        return output_grid
    
    # Organize by scale level
    state_data = {k[2]: v for k, v in bin_data.items() if k[1] == "STATE"}
    county_data = {k[2]: v for k, v in bin_data.items() if k[1] == "COUNTY"}
    city_data = {k[2]: v for k, v in bin_data.items() if k[1] == "CITY"}
    facility_data = {k[2]: v for k, v in bin_data.items() if k[1] == "FACILITY"}
    
    # Determine states to include
    states_to_include = set(state_data.keys())
    
    # Add parent states for counties/cities
    for county_name in county_data.keys():
        if county_name in lookups["county_lookup"]:
            county_geom = lookups["county_lookup"][county_name]
            centroid = county_geom.centroid
            for state_name, state_geom in lookups["state_lookup"].items():
                if state_geom.contains(centroid):
                    states_to_include.add(state_name)
                    break
    
    for city_name in city_data.keys():
        if city_name in lookups["city_lookup"]:
            city_point = lookups["city_lookup"][city_name]
            for state_name, state_geom in lookups["state_lookup"].items():
                if state_geom.contains(city_point):
                    states_to_include.add(state_name)
                    break
    
    # Rasterize STATES
    for state_name in states_to_include:
        if state_name in lookups["state_lookup"]:
            tweet_count = state_data.get(state_name, 1)
            value = np.log1p(float(tweet_count)) * WEIGHTS["STATE"]
            
            # Create temporary polygon feature for this state
            temp_state_fc = "temp_state_single"
            arcpy.CreateFeatureclass_management(SCRATCH_GDB, temp_state_fc, "POLYGON", spatial_reference=TARGET_SR)
            arcpy.AddField_management(temp_state_fc, "value", "FLOAT")
            
            with arcpy.da.InsertCursor(temp_state_fc, ["SHAPE@", "value"]) as cursor:
                cursor.insertRow([lookups["state_lookup"][state_name], value])
            
            # Convert to raster
            temp_raster = "temp_state_raster"
            arcpy.PolygonToRaster_conversion(
                temp_state_fc,
                "value",
                temp_raster,
                cellsize=CELL_SIZE_M
            )
            
            # Convert to numpy and add
            arr = arcpy.RasterToNumPyArray(temp_raster, nodata_to_value=0)
            if arr.shape == output_grid.shape:
                output_grid += arr
            
            # Cleanup
            arcpy.Delete_management(temp_state_fc)
            arcpy.Delete_management(temp_raster)
    
    # Rasterize COUNTIES (similar approach)
    if county_data:
        for county_name, tweet_count in county_data.items():
            if county_name in lookups["county_lookup"]:
                value = np.log1p(float(tweet_count)) * WEIGHTS["COUNTY"]
                
                temp_county_fc = "temp_county_single"
                arcpy.CreateFeatureclass_management(SCRATCH_GDB, temp_county_fc, "POLYGON", spatial_reference=TARGET_SR)
                arcpy.AddField_management(temp_county_fc, "value", "FLOAT")
                
                with arcpy.da.InsertCursor(temp_county_fc, ["SHAPE@", "value"]) as cursor:
                    cursor.insertRow([lookups["county_lookup"][county_name], value])
                
                temp_raster = "temp_county_raster"
                arcpy.PolygonToRaster_conversion(
                    temp_county_fc,
                    "value",
                    temp_raster,
                    cellsize=CELL_SIZE_M
                )
                
                arr = arcpy.RasterToNumPyArray(temp_raster, nodata_to_value=0)
                if arr.shape == output_grid.shape:
                    output_grid += arr
                
                arcpy.Delete_management(temp_county_fc)
                arcpy.Delete_management(temp_raster)
    
    # CITIES using KDE
    if city_data:
        print(f"      Creating city KDE...")
        
        # Create temp point feature class for cities with tweet counts
        temp_city_fc = "temp_city_points"
        arcpy.CreateFeatureclass_management(SCRATCH_GDB, temp_city_fc, "POINT", spatial_reference=TARGET_SR)
        arcpy.AddField_management(temp_city_fc, "weight", "FLOAT")
        
        cities_processed = 0
        with arcpy.da.InsertCursor(temp_city_fc, ["SHAPE@", "weight"]) as cursor:
            for city_name, tweet_count in city_data.items():
                if city_name in lookups["city_lookup"]:
                    city_point = lookups["city_lookup"][city_name]
                    weight = np.log1p(float(tweet_count)) * WEIGHTS["CITY"]
                    cursor.insertRow([city_point, weight])
                    cities_processed += 1
        
        if cities_processed > 0:
            # Apply Kernel Density
            temp_kde = "temp_city_kde"
            arcpy.sa.KernelDensity(
                temp_city_fc,
                "weight",
                temp_kde,
                CELL_SIZE_M,
                CITY_KDE_SEARCH_RADIUS
            ).save(temp_kde)
            
            # Convert to array and add
            arr = arcpy.RasterToNumPyArray(temp_kde, nodata_to_value=0)
            if arr.shape == output_grid.shape:
                output_grid += arr
            
            print(f"        Processed {cities_processed}/{len(city_data)} cities")
            print(f"        City KDE max: {np.max(arr):.2f}")
            
            arcpy.Delete_management(temp_kde)
        
        arcpy.Delete_management(temp_city_fc)
    
    # FACILITIES using KDE (similar to cities)
    if facility_data:
        # Filter facilities that have point geometry in expanded_fc
        temp_fac_fc = "temp_facility_points"
        where_clause = f"unix_timestamp = {unix_timestamp} AND scale_level = 'FACILITY'"
        arcpy.Select_analysis(expanded_fc, temp_fac_fc, where_clause)
        
        count = int(arcpy.GetCount_management(temp_fac_fc)[0])
        if count > 0:
            arcpy.AddField_management(temp_fac_fc, "kde_weight", "FLOAT")
            
            with arcpy.da.UpdateCursor(temp_fac_fc, ["matched_name", "kde_weight"]) as cursor:
                for row in cursor:
                    fac_name = row[0]
                    tweet_count = facility_data.get(fac_name, 1)
                    row[1] = float(tweet_count) * WEIGHTS["FACILITY"]
                    cursor.updateRow(row)
            
            temp_fac_kde = "temp_facility_kde"
            arcpy.sa.KernelDensity(
                temp_fac_fc,
                "kde_weight",
                temp_fac_kde,
                CELL_SIZE_M,
                FACILITY_KDE_SEARCH_RADIUS
            ).save(temp_fac_kde)
            
            arr = arcpy.RasterToNumPyArray(temp_fac_kde, nodata_to_value=0)
            if arr.shape == output_grid.shape:
                output_grid += arr
            
            arcpy.Delete_management(temp_fac_kde)
        
        arcpy.Delete_management(temp_fac_fc)
    
    return output_grid


print("Rasterization functions defined")

---
## Cell 7: Process Hurricane Pipeline

In [None]:
def process_hurricane_arcpy(
    hurricane_name: str,
    expanded_fc: str,
    time_bins: List[int],
    timestamp_dict: Dict[int, datetime],
    interval_counts: Dict[Tuple[int, str, str], int],
    lookups: Dict[str, Any],
    reference_layers: Dict[str, str],
    grid_params: Dict[str, Any]
) -> str:
    """
    Process all time bins and write rasters.
    """
    print(f"\n{'=' * 60}")
    print(f"PROCESSING: {hurricane_name.upper()}")
    print(f"{'=' * 60}")
    
    # Create output directories
    hurricane_dir = os.path.join(OUTPUT_DIR, hurricane_name.lower())
    increment_dir = os.path.join(hurricane_dir, "increment")
    cumulative_dir = os.path.join(hurricane_dir, "cumulative")
    
    os.makedirs(increment_dir, exist_ok=True)
    os.makedirs(cumulative_dir, exist_ok=True)
    
    # Initialize cumulative grid
    cumulative_grid = np.zeros((grid_params["height"], grid_params["width"]), dtype=np.float32)
    
    # Lower-left corner for NumPyArrayToRaster
    lower_left = arcpy.Point(grid_params["minx"], grid_params["miny"])
    
    for idx, time_bin in enumerate(time_bins):
        print(f"\nTime Bin {idx + 1}/{len(time_bins)}")
        
        # Count tweets
        tweet_count = sum(v for k, v in interval_counts.items() if k[0] == time_bin)
        print(f"  Tweets in bin: {tweet_count}")
        
        # Create incremental raster
        incremental_grid = create_hierarchical_raster_arcpy(
            expanded_fc,
            time_bin,
            interval_counts,
            lookups,
            reference_layers,
            grid_params
        )
        
        # Accumulate
        cumulative_grid += incremental_grid
        
        # Save rasters
        time_str = timestamp_dict[time_bin].strftime("%Y%m%d_%H%M%S")
        
        # Incremental
        inc_filename = f"{hurricane_name}_tweets_{time_str}.tif"
        inc_path = os.path.join(increment_dir, inc_filename)
        inc_raster = arcpy.NumPyArrayToRaster(
            incremental_grid,
            lower_left,
            CELL_SIZE_M,
            CELL_SIZE_M
        )
        arcpy.DefineProjection_management(inc_raster, TARGET_SR)
        inc_raster.save(inc_path)
        print(f"    Saved: increment/{inc_filename}")
        
        # Cumulative
        cum_filename = f"{hurricane_name}_tweets_{time_str}.tif"
        cum_path = os.path.join(cumulative_dir, cum_filename)
        cum_raster = arcpy.NumPyArrayToRaster(
            cumulative_grid,
            lower_left,
            CELL_SIZE_M,
            CELL_SIZE_M
        )
        arcpy.DefineProjection_management(cum_raster, TARGET_SR)
        cum_raster.save(cum_path)
        print(f"    Saved: cumulative/{cum_filename}")
        
        print(f"  Incremental max: {np.max(incremental_grid):.2f}")
        print(f"  Cumulative max: {np.max(cumulative_grid):.2f}")
    
    print(f"\n{hurricane_name.upper()} processing complete!")
    return hurricane_dir


print("Hurricane processing function defined")

---
## EXECUTE PIPELINE

Run the following cells to execute the full pipeline:

### Step 1: Load Hurricane Data

In [None]:
print("=" * 80)
print("HURRICANE SOCIAL MEDIA ANALYSIS - ArcPy Implementation")
print("=" * 80)

francine_fc, francine_meta = load_hurricane_data_arcpy(FRANCINE_PATH, "francine")
helene_fc, helene_meta = load_hurricane_data_arcpy(HELENE_PATH, "helene")

### Step 2: Load Reference Layers

In [None]:
reference_layers = load_reference_layers()

### Step 3: Create Hierarchical Lookups

In [None]:
lookups = create_hierarchical_lookups_arcpy(reference_layers)

### Step 4: Expand Tweets by Matches

In [None]:
francine_expanded = expand_tweets_arcpy(
    francine_fc,
    lookups,
    "francine_expanded",
    "FRANCINE"
)

helene_expanded = expand_tweets_arcpy(
    helene_fc,
    lookups,
    "helene_expanded",
    "HELENE"
)

### Step 5: Create Interval Counts

In [None]:
francine_interval_counts = create_interval_counts_dict(francine_expanded)
helene_interval_counts = create_interval_counts_dict(helene_expanded)

### Step 6: Calculate Grid Parameters

In [None]:
# Project hurricane data to target SR for extent calculation
francine_proj = "temp_francine_proj"
helene_proj = "temp_helene_proj"
arcpy.Project_management(francine_fc, francine_proj, TARGET_SR)
arcpy.Project_management(helene_fc, helene_proj, TARGET_SR)

grid_params = calculate_grid_extent(francine_proj, helene_proj)

### Step 7: Process Hurricane Francine

In [None]:
francine_output = process_hurricane_arcpy(
    "francine",
    francine_expanded,
    francine_meta["time_bins"],
    francine_meta["timestamp_dict"],
    francine_interval_counts,
    lookups,
    reference_layers,
    grid_params
)

### Step 8: Process Hurricane Helene

In [None]:
helene_output = process_hurricane_arcpy(
    "helene",
    helene_expanded,
    helene_meta["time_bins"],
    helene_meta["timestamp_dict"],
    helene_interval_counts,
    lookups,
    reference_layers,
    grid_params
)

### Step 9: Cleanup & Summary

In [None]:
# Cleanup temporary feature classes
print("\nCleaning up temporary features...")
cleanup_temp_features("temp_*")

# Summary
print("\n" + "=" * 80)
print("PIPELINE COMPLETE")
print("=" * 80)

print(f"\nOutput Directories:")
print(f"  Francine: {francine_output}")
print(f"  Helene:   {helene_output}")

# Count files
import glob
francine_inc = len(glob.glob(os.path.join(francine_output, 'increment', '*.tif')))
francine_cum = len(glob.glob(os.path.join(francine_output, 'cumulative', '*.tif')))
helene_inc = len(glob.glob(os.path.join(helene_output, 'increment', '*.tif')))
helene_cum = len(glob.glob(os.path.join(helene_output, 'cumulative', '*.tif')))

print(f"\nRasters Created:")
print(f"  Francine: {francine_inc} incremental + {francine_cum} cumulative")
print(f"  Helene:   {helene_inc} incremental + {helene_cum} cumulative")
print(f"  Total:    {francine_inc + francine_cum + helene_inc + helene_cum} GeoTIFF files")

print(f"\nNext Steps:")
print(f"  1. Rasters are ready in: {OUTPUT_DIR}")
print(f"  2. Add to ArcGIS Pro map")
print(f"  3. Configure symbology and time slider")
print(f"  4. Export animations")

# Check in Spatial Analyst
arcpy.CheckInExtension("Spatial")