In [35]:
import arcpy, os, re, datetime as dt
from arcpy.sa import *
import time

# ---------------- CONFIG ----------------
current_file = "cities_cum_helene"
pop_boo = False
if pop_boo:
    pop_str = "pop"
    pop_var = "population"
else:
    pop_str = ""
    pop_var = None

GDB         = r"C:\Users\colto\Documents\tw_project\tw_project\tw_project.gdb"
POINTS_FC   = r"C:\Users\colto\Documents\tw_project\tw_project\tw_project.gdb\cities_cum_helene"
BIN_HOURS   = 4

CELL_SIZE_M = 1000
RADIUS_M    = 18000
MD_NAME     = f"""KD_Time_{current_file}_{pop_str}_"""
PREFIX      = f"""kd_{current_file}_{pop_str}_"""

# H3 Configuration
H3_RESOLUTION = 7  # Resolution 7: ~5.16 km² average hexagon area
H3_PREFIX = f"h3_{current_file}_{pop_str}_"
H3_MD_NAME = f"H3_Time_{current_file}_{pop_str}_"

arcpy.CheckOutExtension("Spatial")
arcpy.env.overwriteOutput  = True
arcpy.env.workspace        = GDB
arcpy.env.scratchWorkspace = GDB

# --- 1) Convert string field -> Date field (official ArcGIS tool) ---
SRC_FC   = POINTS_FC
SRC_TEXT = "time_bin"                 # your text time field
DST_DATE = "time_bin_Converted"       # name of new Date field
FORMAT   = "yyyy-MM-dd HH:mm:ss"      # adjust if yours differs

# Only run conversion if needed
if DST_DATE not in [f.name for f in arcpy.ListFields(SRC_FC)]:
    arcpy.management.ConvertTimeField(
        in_table=SRC_FC,
        input_time_field=SRC_TEXT,
        input_time_format=FORMAT,
        output_time_field=DST_DATE,
        output_time_type="DATE"
    )
    print(f"✅ Created {DST_DATE} from {SRC_TEXT}")
else:
    print(f"✅ {DST_DATE} already exists")

TIME_FIELD = DST_DATE

# 0) Project points to a meters-based CRS (EPSG:5070) for true meter units
sr_in = arcpy.Describe(POINTS_FC).spatialReference
PTS_METERS = os.path.join(GDB, f"""{current_file}_{pop_str}_5070""")
if not arcpy.Exists(PTS_METERS):
    if sr_in.type == "Geographic" or sr_in.linearUnitName.lower() in ("", "degree", "degrees"):
        arcpy.management.Project(POINTS_FC, PTS_METERS, arcpy.SpatialReference(5070))  # NAD83 / Conus Albers
    else:
        # Already projected — make a clean copy with a known name
        arcpy.management.CopyFeatures(POINTS_FC, PTS_METERS)

# Set processing envs to projected space
sr = arcpy.Describe(PTS_METERS).spatialReference
arcpy.env.outputCoordinateSystem = sr
arcpy.env.cellSize = CELL_SIZE_M
# if AOI:
#     arcpy.env.mask = AOI
#     arcpy.env.extent = arcpy.Describe(AOI).extent

✅ time_bin_Converted already exists


In [36]:
# 1) Determine time range
def iter_times(fc, fld):
    with arcpy.da.SearchCursor(fc, [fld]) as rows:
        print(rows)
        for (t,) in rows:
            if t:
                yield t

times = list(iter_times(PTS_METERS, TIME_FIELD))

if not times:
    raise RuntimeError(f"No valid times in field '{TIME_FIELD}'.")

tmin, tmax = min(times), max(times)
start = dt.datetime(tmin.year, tmin.month, tmin.day, (tmin.hour // BIN_HOURS) * BIN_HOURS)
print(f"Time range: {tmin} to {tmax}")
print(f"Starting from: {start}")

<da.SearchCursor object at 0x00000268BE5BA790>
Time range: 2024-09-26 00:00:00 to 2024-09-27 16:00:00
Starting from: 2024-09-26 00:00:00


In [37]:
# === H3 HEXAGONAL AGGREGATION PER TIME BIN (Using ArcGIS Pro's Built-in H3) ===
# Tessellated cells kept as feature classes (NO rasterization)

# Get the extent of the tweet data and buffer it to cover entire states
data_extent = arcpy.Describe(PTS_METERS).extent

# Buffer the extent by 100km (100000 meters) to ensure we capture entire states
buffer_distance = 100000  # 100 km in meters

buffered_extent = arcpy.Extent(
    XMin=data_extent.XMin - buffer_distance,
    YMin=data_extent.YMin - buffer_distance,
    XMax=data_extent.XMax + buffer_distance,
    YMax=data_extent.YMax + buffer_distance
)

print(f"Data extent: {data_extent.XMin:.0f}, {data_extent.YMin:.0f}, {data_extent.XMax:.0f}, {data_extent.YMax:.0f}")
print(f"Buffered extent (100km): {buffered_extent.XMin:.0f}, {buffered_extent.YMin:.0f}, {buffered_extent.XMax:.0f}, {buffered_extent.YMax:.0f}")

# Create the H3 hexagon grid that covers the buffered extent (only once)
h3_grid_base = os.path.join(GDB, f"h3_grid_{current_file}_res{H3_RESOLUTION}")
if not arcpy.Exists(h3_grid_base):
    print(f"Creating H3 resolution {H3_RESOLUTION} grid covering tweet extent + 100km buffer...")
    arcpy.management.GenerateTessellation(
        Output_Feature_Class=h3_grid_base,
        Extent=buffered_extent,
        Shape_Type="H3_HEXAGON",
        H3_Resolution=H3_RESOLUTION
    )
    print(f"✅ Created H3 grid: {h3_grid_base}")
else:
    print(f"✅ H3 grid already exists: {h3_grid_base}")

h3_created = []  # Store feature class names (NOT rasters)
cur = start

while cur <= tmax:
    nxt = cur + dt.timedelta(hours=BIN_HOURS)
    where = (f"{arcpy.AddFieldDelimiters(PTS_METERS, TIME_FIELD)} >= TIMESTAMP '{cur:%Y-%m-%d %H:%M:%S}' "
             f"AND {arcpy.AddFieldDelimiters(PTS_METERS, TIME_FIELD)} < TIMESTAMP '{nxt:%Y-%m-%d %H:%M:%S}'")
    
    # Create temporary layer for this time bin
    lyr_name = os.path.join("memory", f"h3_bin_{cur:%Y%m%d_%H%M}")
    arcpy.conversion.ExportFeatures(
        in_features=PTS_METERS,
        out_features=lyr_name,
        where_clause=where
    )
    
    # Check if we have points in this time bin
    if int(arcpy.management.GetCount(lyr_name).getOutput(0)) > 0:
        # Spatial join: aggregate tweets into H3 hexagons
        h3_with_counts = os.path.join(GDB, f"{H3_PREFIX}{cur:%Y%m%d_%H%M}")
        
        if pop_boo:
            # Sum population field
            field_mapping = f'population "population" true true false 8 Double 0 0,Sum,#,{lyr_name},population,-1,-1'
            arcpy.analysis.SpatialJoin(
                target_features=h3_grid_base,
                join_features=lyr_name,
                out_feature_class=h3_with_counts,
                join_operation="JOIN_ONE_TO_ONE",
                join_type="KEEP_COMMON",
                field_mapping=field_mapping,
                match_option="CONTAINS"
            )
        else:
            # Count tweets per hexagon (Join_Count field)
            arcpy.analysis.SpatialJoin(
                target_features=h3_grid_base,
                join_features=lyr_name,
                out_feature_class=h3_with_counts,
                join_operation="JOIN_ONE_TO_ONE",
                join_type="KEEP_COMMON",
                match_option="CONTAINS"
            )
        
        h3_created.append(h3_with_counts)
        print(f"✅ {cur:%Y-%m-%d %H:%M} → {h3_with_counts}")
    else:
        print(f"⚠️ {cur:%Y-%m-%d %H:%M}: No tweets in this bin")
    
    cur = nxt

if not h3_created:
    raise RuntimeError("No H3 hexagons created. Check data and time field.")

print(f"\n✅ Created {len(h3_created)} time-binned H3 feature classes (NO rasterization)")
print(f"Feature classes: {h3_created}")

# OPTIONAL: Compile all time bins into a single feature class with a timestamp field
if h3_created:
    h3_compiled = os.path.join(GDB, f"H3_compiled_{current_file}_{int(time.time())}")
    
    # Create template from first feature class
    arcpy.management.CopyFeatures(h3_created[0], h3_compiled)
    
    # Add StartTime field if not present
    if "StartTime" not in [f.name for f in arcpy.ListFields(h3_compiled)]:
        arcpy.management.AddField(h3_compiled, "StartTime", "DATE")
    
    # Append remaining and populate timestamp
    for idx, h3_fc in enumerate(h3_created[1:], start=1):
        # Extract timestamp from feature class name (e.g., "h3_cities_cum_helene__20240926_0000")
        time_str = h3_fc.split("_")[-2:]  # Get last 2 parts: YYYYMMDD_HHMM
        time_str = f"{time_str[0]}_{time_str[1]}"
        
        # Add StartTime field to this fc if not present
        if "StartTime" not in [f.name for f in arcpy.ListFields(h3_fc)]:
            arcpy.management.AddField(h3_fc, "StartTime", "DATE")
        
        # Calculate the date
        ts_date = dt.datetime.strptime(time_str, "%Y%m%d_%H%M")
        
        # Append
        arcpy.management.Append(h3_fc, h3_compiled, "NO_TEST")
    
    # Populate StartTime field on all records
    code_block = """import datetime
def parse_name(fc_name):
    try:
        time_str = fc_name.split("_")[-2:]
        time_str = f"{time_str[0]}_{time_str[1]}"
        return datetime.datetime.strptime(time_str, "%Y%m%d_%H%M")
    except:
        return None
"""
    
    # Manual approach: iterate and update
    with arcpy.da.UpdateCursor(h3_compiled, ["StartTime"]) as cursor:
        for idx, row in enumerate(cursor):
            if idx < len(h3_created):
                h3_fc_name = h3_created[idx].split("\\")[-1]
                time_str = h3_fc_name.split("_")[-2:]
                time_str = f"{time_str[0]}_{time_str[1]}"
                ts_date = dt.datetime.strptime(time_str, "%Y%m%d_%H%M")
                row[0] = ts_date
                cursor.updateRow(row)
    
    print(f"✅ Compiled all time bins into: {h3_compiled}")
    print(f"   Use 'StartTime' field for temporal filtering in ArcPro")

Data extent: -6127658, 414147, 1777573, 2750738
Buffered extent (100km): -6227658, 314147, 1877573, 2850738
✅ H3 grid already exists: C:\Users\colto\Documents\tw_project\tw_project\tw_project.gdb\h3_grid_cities_cum_helene_res7


<class 'arcgisscripting.ExecuteError'>: ERROR 000210: Cannot create output C:\Users\colto\Documents\tw_project\tw_project\tw_project.gdb\h3_cities_cum_helene__20240926_0000
Failed to execute (SpatialJoin).


In [None]:
# === CREATE TIME-AWARE MOSAIC DATASET FOR H3 ===
if h3_created:
    timestamp = time.time()
    H3_MD_NAME = f"H3_{current_file}_{pop_str}_{int(timestamp)}"
    
    # Create mosaic dataset for H3 rasters
    H3_MD = arcpy.management.CreateMosaicDataset(
        in_workspace=GDB,
        in_mosaicdataset_name=H3_MD_NAME,
        coordinate_system=sr,
        pixel_type="32_BIT_FLOAT"
    ).getOutput(0)
    
    # Add each H3 raster slice
    for nm in h3_created:
        print(f"Adding {nm} to H3 mosaic dataset...")
        arcpy.management.AddRastersToMosaicDataset(
            in_mosaic_dataset=H3_MD,
            raster_type="Raster Dataset",
            input_path=os.path.join(GDB, nm),
            update_cellsize_ranges="UPDATE_CELL_SIZES",
            update_boundary="UPDATE_BOUNDARY",
            update_overviews="NO_OVERVIEWS"
        )
    
    # Add StartTime field and populate from item Name
    if "StartTime" not in [f.name for f in arcpy.ListFields(H3_MD)]:
        arcpy.management.AddField(H3_MD, "StartTime", "DATE")
    
    code_block = """import datetime
def parse_name(nm):
    # expects 'h3_YYYYMMDD_HHMM' 
    return datetime.datetime.strptime(nm[-13:], '%Y%m%d_%H%M')
"""
    arcpy.management.CalculateField(H3_MD, "StartTime", "parse_name(!Name!)", "PYTHON3", code_block)
    
    # Build pyramids & stats
    arcpy.management.BuildPyramidsandStatistics(H3_MD, skip_existing="OVERWRITE")
    print(f"✅ H3 Resolution 7: {len(h3_created)} time slices → {H3_MD}")
else:
    print("⚠️ No H3 mosaic dataset created (no H3 rasters generated)")

In [None]:

# 2) Per-bin Kernel Density (1 km cell, 18 km radius) saved to the GDB
created = []
cur = start
print(PTS_METERS)
# create local geojson/gdb
while cur <= tmax:
    print(cur)
    nxt = cur + dt.timedelta(hours=BIN_HOURS)
    where = (f"{arcpy.AddFieldDelimiters(PTS_METERS, TIME_FIELD)} >= TIMESTAMP '{cur:%Y-%m-%d %H:%M:%S}' "
             f"AND {arcpy.AddFieldDelimiters(PTS_METERS, TIME_FIELD)} < TIMESTAMP '{nxt:%Y-%m-%d %H:%M:%S}'")

    lyr_name = os.path.join("memory\\", f"bin_{cur:%Y%m%d_%H%M}")
    arcpy.conversion.ExportFeatures(
        in_features=PTS_METERS,
        out_features=lyr_name,
        where_clause=where,
        #use_field_alias_as_name="NOT_USE_ALIAS",
        #field_mapping='city_name "city_name" true true false 80 Text 0 0,First,#,cities_CUMULATIVE_ALL,city_name,0,79;city_id "city_id" true true false 18 Double 0 18,First,#,cities_CUMULATIVE_ALL,city_id,-1,-1;population "population" true true false 18 Double 0 18,First,#,cities_CUMULATIVE_ALL,population,-1,-1;cumul_cnt "cumul_cnt" true true false 18 Double 0 18,First,#,cities_CUMULATIVE_ALL,cumul_cnt,-1,-1;time_bin "time_bin" true true false 80 Text 0 0,First,#,cities_CUMULATIVE_ALL,time_bin,0,79',
        #sort_field=None
    )
    try:
        if int(arcpy.management.GetCount(lyr_name).getOutput(0)) > 0:
            out_name = f"{PREFIX}{cur:%Y%m%d_%H%M}"
            out_path = os.path.join(GDB, out_name)
            if arcpy.Exists(out_path):
                arcpy.management.Delete(out_path)

            kd = KernelDensity(
                in_features=lyr_name,
                population_field=pop_var,
                cell_size=CELL_SIZE_M,
                search_radius=RADIUS_M,
                out_cell_values="DENSITIES",
                method="PLANAR"
            )
            
            kd.save(out_path)
            created.append(out_name)
            
    except Exception:
        raise RuntimeError("KernelDensity failed:\n" + arcpy.GetMessages(2))   
    # arcpy.management.Delete(lyr_name)
    
    cur = nxt

if not created:
    raise RuntimeError("No rasters created. Check time field values and bin size.")


In [None]:

# 3) Create a time-aware Mosaic Dataset and add the rasters we just made
print(GDB, MD_NAME)
if arcpy.Exists(os.path.join(GDB, MD_NAME)):
    arcpy.management.Delete(os.path.join(GDB, MD_NAME))
timestamp = time.time()
MD_NAME = f"KD_{current_file}_{pop_str}_{int(timestamp)}"

# Just create it. Don't check, don't delete, don't reuse.
MD = arcpy.management.CreateMosaicDataset(
    in_workspace=GDB,
    in_mosaicdataset_name=MD_NAME,
    coordinate_system=sr,
    pixel_type="32_BIT_FLOAT"
).getOutput(0)

# Add each slice explicitly (robust for FGDB rasters)
for nm in created:
    print(nm)
    arcpy.management.AddRastersToMosaicDataset(
        in_mosaic_dataset=MD,
        raster_type="Raster Dataset",
        input_path=os.path.join(GDB, nm),
        update_cellsize_ranges="UPDATE_CELL_SIZES",
        update_boundary="UPDATE_BOUNDARY",
        update_overviews="NO_OVERVIEWS"
    )



In [None]:
# 4) Add StartTime on the mosaic items and populate from item Name (kd_YYYYMMDD_HHMM)
if "StartTime" not in [f.name for f in arcpy.ListFields(MD)]:
    arcpy.management.AddField(MD, "StartTime", "DATE")

code_block = """import datetime
def parse_name(nm):
    # expects 'kd_YYYYMMDD_HHMM' 
    return datetime.datetime.strptime(nm[-13:], '%Y%m%d_%H%M')
"""
arcpy.management.CalculateField(MD, "StartTime", "parse_name(!Name!)", "PYTHON3", code_block)
# arcpy.management.EnableTime(MD, "StartTime", "Single", timeStepInterval=str(BIN_HOURS), timeStepUnits="HOURS")

# Build pyramids & stats (correct function name/case)
arcpy.management.BuildPyramidsandStatistics(MD, skip_existing="OVERWRITE")
print(f"OK: {len(created)} KDE slices → {MD}")
# arcpy.management.Delete(kd)