In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os
import folium
from click.formatting import iter_rows
from folium import plugins
import json
import geopandas as gpd
import pandas as pd
import numpy as np
import os
import folium
from folium import plugins
import json
pd.set_option('display.max_columns', None)



def get_project_root():
    # This assumes the notebook is running from within the project's root folder.
    return os.getcwd()

def get_data_file_path(*path_segments):
    project_root = get_project_root()
    # Navigate to the data folder from the project root.
    return os.path.join(project_root, *path_segments)

# Data loading functions
def get_geojson():
    """Get path to helene.geojson"""
    geojson = get_data_file_path('data', 'geojson', 'helene.geojson')
    print(geojson)
    return gpd.read_file(geojson)

def get_cities():
    df_path = get_data_file_path('data', 'tables', 'cities1000.csv')
    # df = pd.read_csv(df_path)
    df = pd.read_csv(df_path, low_memory=False)

    us_cities_df = df[
        (df['country_code'] == 'US') &
        (df['feature_class'] == 'P') &
        (df['population'].notna()) &
        (df['latitude'].notna()) &
        (df['longitude'].notna())
        ].reset_index(drop=True)

    us_cities_gdf = gpd.GeoDataFrame(
        us_cities_df,
        geometry=gpd.points_from_xy(us_cities_df.longitude, us_cities_df.latitude),
        crs="EPSG:4326"
    )
    return us_cities_gdf

def get_states():
    gdf_path = get_data_file_path('data', 'shape_files', "cb_2023_us_state_20m.shp")
    return gpd.read_file(gdf_path)

def get_counties():
    gdf_path = get_data_file_path('data', 'shape_files', "cb_2023_us_county_20m.shp")
    return gpd.read_file(gdf_path)

tweets_gdf = get_geojson().to_crs("EPSG:4326")
us_cities_gdf = get_cities().to_crs("EPSG:4326")
us_states_gdf = get_states().to_crs("EPSG:4326")
us_counties_gdf = get_counties().to_crs("EPSG:4326")

C:\Users\colto\Documents\GitHub\Tweet_project\data\geojson\helene.geojson


In [2]:
# ==============================================================================
# NEW APPROACH: Count tweets by GPE/FAC mentions ONLY (ignore lat/lon)
# ==============================================================================

from fuzzywuzzy import fuzz, process
import re

def preprocess_place_name(name):
    """Standardize place names for matching"""
    if pd.isna(name) or name == 'NAN' or name == '':
        return None
    name = str(name).upper().strip()
    name = re.sub(r'\bST\.?\b', 'SAINT', name)
    name = re.sub(r'\bMT\.?\b', 'MOUNT', name)
    name = re.sub(r'\bFT\.?\b', 'FORT', name)
    name = re.sub(r'[^\w\s]', '', name)
    name = re.sub(r'\s+', ' ', name)
    return name.strip()

def parse_gpe_entities(gpe_string):
    """Split GPE field into individual place mentions"""
    if not gpe_string or pd.isna(gpe_string) or str(gpe_string).strip() == '':
        return []
    gpe_string = str(gpe_string).strip()
    entities = []
    for part in [p.strip() for p in gpe_string.split(',')]:
        if not part:
            continue
        for sub in re.split(r'[;&|]', part):
            sub = preprocess_place_name(sub)
            if sub and len(sub) > 1:
                entities.append(sub)
    # Remove duplicates while preserving order
    seen, clean = set(), []
    for e in entities:
        if e not in seen:
            clean.append(e)
            seen.add(e)
    return clean

def create_lookup_dictionaries(states_gdf, counties_gdf, cities_gdf):
    """Build name->geometry lookup dictionaries"""
    print("Building lookup dictionaries...")
    
    # States
    state_lookup = {}
    state_abbrev_to_name = {}
    for _, row in states_gdf.iterrows():
        name = preprocess_place_name(row['NAME'])
        if name:
            state_lookup[name] = row
        if 'STUSPS' in row:
            abbr = str(row['STUSPS']).upper()
            state_abbrev_to_name[abbr] = name
            state_lookup[abbr] = row
    
    # Counties
    county_lookup = {}
    for _, row in counties_gdf.iterrows():
        name = preprocess_place_name(row['NAME'])
        if name:
            county_lookup[name] = row
    
    # Cities
    city_lookup = {}
    for _, row in cities_gdf.iterrows():
        name = preprocess_place_name(row['name'])
        if name:
            city_lookup[name] = row
    
    print(f"  States: {len(state_lookup)}")
    print(f"  Counties: {len(county_lookup)}")
    print(f"  Cities: {len(city_lookup)}")
    
    return state_lookup, county_lookup, city_lookup, state_abbrev_to_name

def fuzzy_match_entity(entity, lookup_dict, threshold=85):
    """Fuzzy match entity to lookup dictionary"""
    if entity in lookup_dict:
        return lookup_dict[entity], 100
    
    names = list(lookup_dict.keys())
    if not names:
        return None, 0
    
    match = process.extractOne(entity, names, scorer=fuzz.ratio)
    if match and match[1] >= threshold:
        return lookup_dict[match[0]], match[1]
    
    return None, 0

def count_mentions_in_tweets(tweets_gdf, state_lookup, county_lookup, city_lookup):
    """
    Count tweets by what they MENTION, not where they are located.
    Also track which tweets mentioned each entity for detailed output.
    
    Rules:
    - If tweet mentions "Texas" → Texas state gets +1
    - If tweet mentions "Houston" → Houston city gets +1
    - If tweet mentions "Harris County" → Harris County gets +1
    - All mentions in a single tweet count independently
    """
    print("\nCounting tweet mentions...")
    
    state_mentions = {}
    county_mentions = {}
    city_mentions = {}
    
    # Track tweet details for each entity (especially for cities)
    state_tweet_details = {}
    county_tweet_details = {}
    city_tweet_details = {}
    
    for idx, row in tweets_gdf.iterrows():
        if idx % 100 == 0:
            print(f"  Processing tweet {idx}/{len(tweets_gdf)}")
        
        # Parse GPE field
        entities = parse_gpe_entities(row['GPE'])
        original_gpe = str(row['GPE']) if pd.notna(row['GPE']) else ''
        tweet_time = str(row['time']) if pd.notna(row['time']) else ''
        
        for entity in entities:
            # Try to match to state
            state_match, state_score = fuzzy_match_entity(entity, state_lookup, threshold=90)
            if state_match is not None:
                state_code = state_match['STUSPS']
                state_mentions[state_code] = state_mentions.get(state_code, 0) + 1
                
                # Track tweet details
                if state_code not in state_tweet_details:
                    state_tweet_details[state_code] = []
                state_tweet_details[state_code].append({
                    'original_gpe': original_gpe,
                    'matched_entity': entity,
                    'time': tweet_time
                })
                continue
            
            # Try to match to county
            county_match, county_score = fuzzy_match_entity(entity, county_lookup, threshold=85)
            if county_match is not None:
                county_id = county_match['GEOID']
                county_mentions[county_id] = county_mentions.get(county_id, 0) + 1
                
                # Track tweet details
                if county_id not in county_tweet_details:
                    county_tweet_details[county_id] = []
                county_tweet_details[county_id].append({
                    'original_gpe': original_gpe,
                    'matched_entity': entity,
                    'time': tweet_time
                })
                continue
            
            # Try to match to city
            city_match, city_score = fuzzy_match_entity(entity, city_lookup, threshold=85)
            if city_match is not None:
                city_id = city_match['geonameid']
                city_mentions[city_id] = city_mentions.get(city_id, 0) + 1
                
                # Track tweet details
                if city_id not in city_tweet_details:
                    city_tweet_details[city_id] = []
                city_tweet_details[city_id].append({
                    'original_gpe': original_gpe,
                    'matched_entity': entity,
                    'time': tweet_time
                })
    
    print(f"\n  Found mentions:")
    print(f"    States: {len(state_mentions)}")
    print(f"    Counties: {len(county_mentions)}")
    print(f"    Cities: {len(city_mentions)}")
    
    return (state_mentions, county_mentions, city_mentions,
            state_tweet_details, county_tweet_details, city_tweet_details)

def create_count_gdfs(state_mentions, county_mentions, city_mentions,
                      state_tweet_details, county_tweet_details, city_tweet_details,
                      us_states_gdf, us_counties_gdf, us_cities_gdf):
    """Create GeoDataFrames with mention counts and tweet details"""
    
    # States
    state_counts_df = pd.DataFrame([
        {
            'STUSPS': k, 
            'tweet_count': v,
            'sample_mentions': '; '.join([d['matched_entity'] for d in state_tweet_details[k][:5]]),
            'sample_gpe_text': '; '.join([d['original_gpe'][:100] for d in state_tweet_details[k][:3]])
        } 
        for k, v in state_mentions.items()
    ])
    states_with_counts = us_states_gdf.merge(
        state_counts_df, on='STUSPS', how='left'
    )
    states_with_counts['tweet_count'] = states_with_counts['tweet_count'].fillna(0)
    
    # Counties
    county_counts_df = pd.DataFrame([
        {
            'GEOID': k, 
            'tweet_count': v,
            'sample_mentions': '; '.join([d['matched_entity'] for d in county_tweet_details[k][:5]]),
            'sample_gpe_text': '; '.join([d['original_gpe'][:100] for d in county_tweet_details[k][:3]])
        } 
        for k, v in county_mentions.items()
    ])
    counties_with_counts = us_counties_gdf.merge(
        county_counts_df, on='GEOID', how='left'
    )
    counties_with_counts['tweet_count'] = counties_with_counts['tweet_count'].fillna(0)
    
    # Cities - with full tweet text details
    city_counts_df = pd.DataFrame([
        {
            'geonameid': k, 
            'tweet_count': v,
            'matched_entities': '; '.join([d['matched_entity'] for d in city_tweet_details[k]]),
            'original_gpe_text': ' | '.join([d['original_gpe'] for d in city_tweet_details[k]]),
            'mention_times': '; '.join([d['time'] for d in city_tweet_details[k][:10]])
        } 
        for k, v in city_mentions.items()
    ])
    cities_with_counts = us_cities_gdf.merge(
        city_counts_df, on='geonameid', how='left'
    )
    cities_with_counts['tweet_count'] = cities_with_counts['tweet_count'].fillna(0)
    
    return states_with_counts, counties_with_counts, cities_with_counts

# Execute the new approach
state_lookup, county_lookup, city_lookup, state_abbrev_to_name = create_lookup_dictionaries(
    us_states_gdf, us_counties_gdf, us_cities_gdf
)

(state_mentions, county_mentions, city_mentions,
 state_tweet_details, county_tweet_details, city_tweet_details) = count_mentions_in_tweets(
    tweets_gdf, state_lookup, county_lookup, city_lookup
)

states_with_counts, counties_with_counts, cities_with_counts = create_count_gdfs(
    state_mentions, county_mentions, city_mentions,
    state_tweet_details, county_tweet_details, city_tweet_details,
    us_states_gdf, us_counties_gdf, us_cities_gdf
)

print("\nTop states by mentions:")
print(states_with_counts[states_with_counts['tweet_count'] > 0][['NAME', 'STUSPS', 'tweet_count', 'sample_mentions']].sort_values('tweet_count', ascending=False).head(10))

print("\nTop counties by mentions:")
print(counties_with_counts[counties_with_counts['tweet_count'] > 0][['NAME', 'GEOID', 'tweet_count', 'sample_mentions']].sort_values('tweet_count', ascending=False).head(10))

print("\nTop cities by mentions:")
print(cities_with_counts[cities_with_counts['tweet_count'] > 0][['name', 'geonameid', 'tweet_count', 'matched_entities']].sort_values('tweet_count', ascending=False).head(10))

Building lookup dictionaries...
  States: 104
  Counties: 1915
  Cities: 12256

Counting tweet mentions...
  Processing tweet 0/3007
  Processing tweet 100/3007
  Processing tweet 200/3007
  Processing tweet 300/3007
  Processing tweet 400/3007
  Processing tweet 500/3007
  Processing tweet 600/3007
  Processing tweet 700/3007
  Processing tweet 800/3007
  Processing tweet 900/3007
  Processing tweet 1000/3007
  Processing tweet 1100/3007
  Processing tweet 1200/3007
  Processing tweet 1300/3007
  Processing tweet 1400/3007
  Processing tweet 1500/3007
  Processing tweet 1600/3007
  Processing tweet 1700/3007
  Processing tweet 1800/3007
  Processing tweet 1900/3007
  Processing tweet 2000/3007
  Processing tweet 2100/3007
  Processing tweet 2200/3007
  Processing tweet 2300/3007
  Processing tweet 2400/3007
  Processing tweet 2500/3007
  Processing tweet 2600/3007
  Processing tweet 2700/3007
  Processing tweet 2800/3007
  Processing tweet 2900/3007
  Processing tweet 3000/3007

  Fou

In [4]:
def count_mentions_in_tweets_temporal_with_cascade(tweets_gdf, state_lookup, county_lookup, city_lookup, 
                                                    us_states_gdf, us_counties_gdf, us_cities_gdf):
    """
    Count mentions by time bin WITH hierarchical cascade.
    
    CASCADING RULES:
    1. Tweet mentions are counted at the level mentioned (city/county/state)
    2. Tweet POINTS (lat/lon) are also spatially joined to add cascade counts:
       - Each tweet point finds its containing county → +1 to county
       - Each county cascades to its state → +1 to state
       - Each tweet point finds nearest city (within 50km) → +1 to city
    
    Returns dictionaries: {time_bin: {entity_id: count}}
    """
    print("\nCounting tweet mentions by time bin WITH HIERARCHICAL CASCADE...")
    
    # Add time binning
    tweets_gdf['time'] = pd.to_datetime(tweets_gdf['time'])
    tweets_gdf['bin'] = tweets_gdf['time'].dt.floor('4h')
    
    time_bins = sorted(tweets_gdf['bin'].unique())
    
    # Initialize dictionaries for each time bin
    temporal_state_mentions = {tb: {} for tb in time_bins}
    temporal_county_mentions = {tb: {} for tb in time_bins}
    temporal_city_mentions = {tb: {} for tb in time_bins}
    
    # Track tweet details for each entity at each time bin
    temporal_state_details = {tb: {} for tb in time_bins}
    temporal_county_details = {tb: {} for tb in time_bins}
    temporal_city_details = {tb: {} for tb in time_bins}
    
    # Ensure CRS matches for spatial operations
    tweets_gdf = tweets_gdf.to_crs("EPSG:4326")
    us_states_gdf = us_states_gdf.to_crs("EPSG:4326")
    us_counties_gdf = us_counties_gdf.to_crs("EPSG:4326")
    us_cities_gdf = us_cities_gdf.to_crs("EPSG:4326")
    
    for idx, row in tweets_gdf.iterrows():
        if idx % 100 == 0:
            print(f"  Processing tweet {idx}/{len(tweets_gdf)}")
        
        time_bin = row['bin']
        entities = parse_gpe_entities(row['GPE'])
        original_gpe = str(row['GPE']) if pd.notna(row['GPE']) else ''
        tweet_time = str(row['time']) if pd.notna(row['time']) else ''
        tweet_point = row.geometry
        
        # === PART 1: COUNT MENTIONS (text-based) ===
        for entity in entities:
            # Try state match
            state_match, state_score = fuzzy_match_entity(entity, state_lookup, threshold=90)
            if state_match is not None:
                state_code = state_match['STUSPS']
                temporal_state_mentions[time_bin][state_code] = temporal_state_mentions[time_bin].get(state_code, 0) + 1
                
                # Track details
                if state_code not in temporal_state_details[time_bin]:
                    temporal_state_details[time_bin][state_code] = []
                temporal_state_details[time_bin][state_code].append({
                    'original_gpe': original_gpe,
                    'matched_entity': entity,
                    'time': tweet_time
                })
                continue
            
            # Try county match
            county_match, county_score = fuzzy_match_entity(entity, county_lookup, threshold=85)
            if county_match is not None:
                county_id = county_match['GEOID']
                temporal_county_mentions[time_bin][county_id] = temporal_county_mentions[time_bin].get(county_id, 0) + 1
                
                # Track details
                if county_id not in temporal_county_details[time_bin]:
                    temporal_county_details[time_bin][county_id] = []
                temporal_county_details[time_bin][county_id].append({
                    'original_gpe': original_gpe,
                    'matched_entity': entity,
                    'time': tweet_time
                })
                continue
            
            # Try city match
            city_match, city_score = fuzzy_match_entity(entity, city_lookup, threshold=85)
            if city_match is not None:
                city_id = city_match['geonameid']
                temporal_city_mentions[time_bin][city_id] = temporal_city_mentions[time_bin].get(city_id, 0) + 1
                
                # Track details
                if city_id not in temporal_city_details[time_bin]:
                    temporal_city_details[time_bin][city_id] = []
                temporal_city_details[time_bin][city_id].append({
                    'original_gpe': original_gpe,
                    'matched_entity': entity,
                    'time': tweet_time
                })
        
        # === PART 2: CASCADE FROM TWEET POINT (spatial-based) ===
        # Find containing county
        containing_county = us_counties_gdf[us_counties_gdf.contains(tweet_point)]
        
        if len(containing_county) > 0:
            county_geoid = containing_county.iloc[0]['GEOID']
            county_statefp = containing_county.iloc[0]['STATEFP']
            county_name = containing_county.iloc[0]['NAME']
            
            # CASCADE: Increment county count
            temporal_county_mentions[time_bin][county_geoid] = temporal_county_mentions[time_bin].get(county_geoid, 0) + 1
            
            # Track details (cascade)
            if county_geoid not in temporal_county_details[time_bin]:
                temporal_county_details[time_bin][county_geoid] = []
            temporal_county_details[time_bin][county_geoid].append({
                'original_gpe': f'[CASCADE from point in {county_name}]',
                'matched_entity': f'{county_name} County',
                'time': tweet_time
            })
            
            # CASCADE: Find containing state
            containing_state = us_states_gdf[us_states_gdf['STATEFP'] == county_statefp]
            
            if len(containing_state) > 0:
                state_code = containing_state.iloc[0]['STUSPS']
                state_name = containing_state.iloc[0]['NAME']
                
                # CASCADE: Increment state count
                temporal_state_mentions[time_bin][state_code] = temporal_state_mentions[time_bin].get(state_code, 0) + 1
                
                # Track details (cascade)
                if state_code not in temporal_state_details[time_bin]:
                    temporal_state_details[time_bin][state_code] = []
                temporal_state_details[time_bin][state_code].append({
                    'original_gpe': f'[CASCADE from point in {state_name}]',
                    'matched_entity': state_name,
                    'time': tweet_time
                })
        
        # CASCADE: Find nearest city (within 50km)
        tweet_buffer = tweet_point.buffer(0.45)  # ~50km
        nearby_cities = us_cities_gdf[us_cities_gdf.geometry.within(tweet_buffer)]
        
        if len(nearby_cities) > 0:
            distances = nearby_cities.geometry.distance(tweet_point)
            closest_city_idx = distances.idxmin()
            closest_city = nearby_cities.loc[closest_city_idx]
            
            city_id = closest_city['geonameid']
            city_name = closest_city['name']
            
            # CASCADE: Increment city count
            temporal_city_mentions[time_bin][city_id] = temporal_city_mentions[time_bin].get(city_id, 0) + 1
            
            # Track details (cascade)
            if city_id not in temporal_city_details[time_bin]:
                temporal_city_details[time_bin][city_id] = []
            temporal_city_details[time_bin][city_id].append({
                'original_gpe': f'[CASCADE from nearby point]',
                'matched_entity': city_name,
                'time': tweet_time
            })
    
    return (time_bins, temporal_state_mentions, temporal_county_mentions, temporal_city_mentions,
            temporal_state_details, temporal_county_details, temporal_city_details)

def create_temporal_aggregations(time_bins, temporal_state_mentions, temporal_county_mentions, 
                                 temporal_city_mentions, temporal_state_details, temporal_county_details, 
                                 temporal_city_details):
    """Create aggregated counts for each time bin with tweet details"""
    temporal_data = {}
    
    for bin_time in time_bins:
        # Convert mention dictionaries to DataFrames with details
        state_counts = pd.DataFrame([
            {
                'state_code': k, 
                'tweet_count': v,
                'sample_gpe_text': ' | '.join([d['original_gpe'][:100] for d in temporal_state_details[bin_time][k][:3]])
            }
            for k, v in temporal_state_mentions[bin_time].items()
        ])
        
        county_counts = pd.DataFrame([
            {
                'county_fips': k, 
                'tweet_count': v,
                'sample_gpe_text': ' | '.join([d['original_gpe'][:100] for d in temporal_county_details[bin_time][k][:3]])
            }
            for k, v in temporal_county_mentions[bin_time].items()
        ])
        
        city_counts = pd.DataFrame([
            {
                'city_id': k,
                'tweet_count': v,
                'original_gpe_text': ' | '.join([d['original_gpe'] for d in temporal_city_details[bin_time][k]]),
                'matched_entities': '; '.join([d['matched_entity'] for d in temporal_city_details[bin_time][k]])
            }
            for k, v in temporal_city_mentions[bin_time].items()
        ])
        
        temporal_data[bin_time] = {
            'states': state_counts,
            'counties': county_counts,
            'cities': city_counts
        }
    
    return temporal_data

# Execute temporal counting WITH CASCADE
print("\n" + "="*60)
print("TEMPORAL COUNTING WITH HIERARCHICAL CASCADE")
print("="*60)

(time_bins, temporal_state_mentions, temporal_county_mentions, temporal_city_mentions,
 temporal_state_details, temporal_county_details, temporal_city_details) = \
    count_mentions_in_tweets_temporal_with_cascade(
        tweets_gdf, state_lookup, county_lookup, city_lookup,
        us_states_gdf, us_counties_gdf, us_cities_gdf
    )

# Create temporal aggregations
temporal_data = create_temporal_aggregations(
    time_bins, temporal_state_mentions, temporal_county_mentions, temporal_city_mentions,
    temporal_state_details, temporal_county_details, temporal_city_details
)

print(f"\nTemporal bins created: {len(time_bins)}")
print(f"Time range: {time_bins[0]} to {time_bins[-1]}")
print(f"\nCounts now include:")
print(f"  - Text mentions from GPE field")
print(f"  - Spatial cascade from tweet point locations")


TEMPORAL COUNTING WITH HIERARCHICAL CASCADE

Counting tweet mentions by time bin WITH HIERARCHICAL CASCADE...
  Processing tweet 0/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 100/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 200/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 300/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 400/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 500/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 600/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 700/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 800/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 900/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 1000/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 1100/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 1200/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 1300/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 1400/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 1500/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 1600/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 1700/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 1800/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 1900/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 2000/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 2100/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 2200/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 2300/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 2400/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 2500/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 2600/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 2700/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 2800/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 2900/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 3000/3007

Temporal bins created: 11
Time range: 2024-09-26 00:00:00+00:00 to 2024-09-27 16:00:00+00:00

Counts now include:
  - Text mentions from GPE field
  - Spatial cascade from tweet point locations



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)


In [5]:
# ==============================================================================
# EXPORT TEMPORAL (4-HOUR BINNED) DATA TO ARCGIS
# Both INCREMENTAL (per bin) and CUMULATIVE (running total) counts
# ==============================================================================

def export_temporal_to_arcgis(temporal_data, time_bins, us_states_gdf, us_counties_gdf, us_cities_gdf,
                               output_dir='arcgis_outputs'):
    """
    Export temporal (4-hour binned) data for states, counties, and cities.
    Creates BOTH incremental and cumulative count files.

    - Incremental: Count for just that 4-hour bin
    - Cumulative: Running total up to and including that bin (persists even if bin has 0 new mentions)
    """
    import os
    from datetime import datetime

    # Create temporal output directories
    temporal_dir = os.path.join(output_dir, 'temporal_4hour_bins')
    incremental_dir = os.path.join(temporal_dir, 'incremental')
    cumulative_dir = os.path.join(temporal_dir, 'cumulative')

    os.makedirs(incremental_dir, exist_ok=True)
    os.makedirs(cumulative_dir, exist_ok=True)

    print(f"\n{'='*60}")
    print("EXPORTING TEMPORAL DATA - INCREMENTAL & CUMULATIVE")
    print("="*60)
    print(f"\nTime bins: {len(time_bins)}")
    print(f"Output directory: {temporal_dir}")

    # Track individual bin files for later merging
    incremental_bin_files = {'states': [], 'counties': [], 'cities': []}
    cumulative_bin_files = {'states': [], 'counties': [], 'cities': []}

    # Cumulative tracking dictionaries
    cumulative_state_counts = {}
    cumulative_county_counts = {}
    cumulative_city_counts = {}

    # Track all entities that have ever been mentioned
    all_mentioned_states = set()
    all_mentioned_counties = set()
    all_mentioned_cities = set()

    # First pass: collect all entities that ever get mentioned
    for bin_time in time_bins:
        counts_data = temporal_data[bin_time]
        if len(counts_data['states']) > 0:
            all_mentioned_states.update(counts_data['states']['state_code'].values)
        if len(counts_data['counties']) > 0:
            all_mentioned_counties.update(counts_data['counties']['county_fips'].values)
        if len(counts_data['cities']) > 0:
            all_mentioned_cities.update(counts_data['cities']['city_id'].values)

    print(f"\nEntities ever mentioned:")
    print(f"  States: {len(all_mentioned_states)}")
    print(f"  Counties: {len(all_mentioned_counties)}")
    print(f"  Cities: {len(all_mentioned_cities)}")

    # Process each time bin
    for idx, bin_time in enumerate(time_bins):
        bin_str = bin_time.strftime('%Y%m%d_%H%M')
        bin_label = bin_time.strftime('%Y-%m-%d %H:%M:%S')

        print(f"\n  Processing time bin {idx+1}/{len(time_bins)}: {bin_label}")

        counts_data = temporal_data[bin_time]

        # === STATES ===
        # Update cumulative counts for states with new mentions
        if len(counts_data['states']) > 0:
            for _, row in counts_data['states'].iterrows():
                state_code = row['state_code']
                cumulative_state_counts[state_code] = cumulative_state_counts.get(state_code, 0) + row['tweet_count']

        # INCREMENTAL: Only states with mentions in THIS bin
        if len(counts_data['states']) > 0:
            states_inc = us_states_gdf.merge(counts_data['states'], left_on='STUSPS', right_on='state_code', how='inner')
            states_inc['time_bin'] = bin_label
            states_inc['bin_start'] = bin_time
            states_inc['count_type'] = 'incremental'

            # Save individual shapefile
            states_shp = states_inc[['NAME', 'STUSPS', 'tweet_count', 'sample_gpe_text', 'time_bin', 'geometry']].copy()
            states_shp.columns = ['state_name', 'state_code', 'tweet_cnt', 'smpl_gpe', 'time_bin', 'geometry']
            shp_path = os.path.join(incremental_dir, f'states_inc_{bin_str}.shp')
            states_shp.to_file(shp_path)
            incremental_bin_files['states'].append(shp_path)

            print(f"    States incremental: {len(states_inc)} features")

        # CUMULATIVE: ALL states that have ever been mentioned (even if 0 this bin)
        cumulative_states_data = pd.DataFrame([
            {'state_code': code, 'cumulative_count': count}
            for code, count in cumulative_state_counts.items()
        ])

        states_cum = us_states_gdf.merge(cumulative_states_data, left_on='STUSPS', right_on='state_code', how='inner')
        states_cum['time_bin'] = bin_label
        states_cum['bin_start'] = bin_time
        states_cum['count_type'] = 'cumulative'

        # Save individual shapefile
        states_cum_shp = states_cum[['NAME', 'STUSPS', 'cumulative_count', 'time_bin', 'geometry']].copy()
        states_cum_shp.columns = ['state_name', 'state_code', 'cumul_cnt', 'time_bin', 'geometry']
        shp_path = os.path.join(cumulative_dir, f'states_cum_{bin_str}.shp')
        states_cum_shp.to_file(shp_path)
        cumulative_bin_files['states'].append(shp_path)

        print(f"    States cumulative: {len(states_cum)} features (total mentions so far)")

        # === COUNTIES ===
        # Update cumulative counts
        if len(counts_data['counties']) > 0:
            for _, row in counts_data['counties'].iterrows():
                county_id = row['county_fips']
                cumulative_county_counts[county_id] = cumulative_county_counts.get(county_id, 0) + row['tweet_count']

        # INCREMENTAL
        if len(counts_data['counties']) > 0:
            counties_inc = us_counties_gdf.merge(counts_data['counties'], left_on='GEOID', right_on='county_fips', how='inner')
            counties_inc['time_bin'] = bin_label
            counties_inc['bin_start'] = bin_time
            counties_inc['count_type'] = 'incremental'

            counties_shp = counties_inc[['NAME', 'GEOID', 'STATEFP', 'tweet_count', 'sample_gpe_text', 'time_bin', 'geometry']].copy()
            counties_shp.columns = ['cnty_name', 'cnty_id', 'state_fp', 'tweet_cnt', 'smpl_gpe', 'time_bin', 'geometry']
            shp_path = os.path.join(incremental_dir, f'counties_inc_{bin_str}.shp')
            counties_shp.to_file(shp_path)
            incremental_bin_files['counties'].append(shp_path)

            print(f"    Counties incremental: {len(counties_inc)} features")

        # CUMULATIVE
        cumulative_counties_data = pd.DataFrame([
            {'county_fips': code, 'cumulative_count': count}
            for code, count in cumulative_county_counts.items()
        ])

        counties_cum = us_counties_gdf.merge(cumulative_counties_data, left_on='GEOID', right_on='county_fips', how='inner')
        counties_cum['time_bin'] = bin_label
        counties_cum['bin_start'] = bin_time
        counties_cum['count_type'] = 'cumulative'

        counties_cum_shp = counties_cum[['NAME', 'GEOID', 'STATEFP', 'cumulative_count', 'time_bin', 'geometry']].copy()
        counties_cum_shp.columns = ['cnty_name', 'cnty_id', 'state_fp', 'cumul_cnt', 'time_bin', 'geometry']
        shp_path = os.path.join(cumulative_dir, f'counties_cum_{bin_str}.shp')
        counties_cum_shp.to_file(shp_path)
        cumulative_bin_files['counties'].append(shp_path)

        print(f"    Counties cumulative: {len(counties_cum)} features")

        # === CITIES ===
        # Update cumulative counts
        if len(counts_data['cities']) > 0:
            for _, row in counts_data['cities'].iterrows():
                city_id = row['city_id']
                cumulative_city_counts[city_id] = cumulative_city_counts.get(city_id, 0) + row['tweet_count']

        # INCREMENTAL
        if len(counts_data['cities']) > 0:
            cities_inc = us_cities_gdf.merge(counts_data['cities'], left_on='geonameid', right_on='city_id', how='inner')
            cities_inc['time_bin'] = bin_label
            cities_inc['bin_start'] = bin_time
            cities_inc['count_type'] = 'incremental'

            cities_shp = cities_inc[['name', 'geonameid', 'population', 'tweet_count', 'matched_entities', 'original_gpe_text', 'time_bin', 'geometry']].copy()
            cities_shp['orig_gpe'] = cities_shp['original_gpe_text'].str[:254]
            cities_shp['mtch_ent'] = cities_shp['matched_entities'].str[:254]
            cities_shp = cities_shp[['name', 'geonameid', 'population', 'tweet_count', 'mtch_ent', 'orig_gpe', 'time_bin', 'geometry']].copy()
            cities_shp.columns = ['city_name', 'city_id', 'population', 'tweet_cnt', 'mtchd_ent', 'orig_gpe', 'time_bin', 'geometry']
            shp_path = os.path.join(incremental_dir, f'cities_inc_{bin_str}.shp')
            cities_shp.to_file(shp_path)
            incremental_bin_files['cities'].append(shp_path)

            print(f"    Cities incremental: {len(cities_inc)} features")

        # CUMULATIVE
        cumulative_cities_data = pd.DataFrame([
            {'city_id': code, 'cumulative_count': count}
            for code, count in cumulative_city_counts.items()
        ])

        cities_cum = us_cities_gdf.merge(cumulative_cities_data, left_on='geonameid', right_on='city_id', how='inner')
        cities_cum['time_bin'] = bin_label
        cities_cum['bin_start'] = bin_time
        cities_cum['count_type'] = 'cumulative'

        cities_cum_shp = cities_cum[['name', 'geonameid', 'population', 'cumulative_count', 'time_bin', 'geometry']].copy()
        cities_cum_shp.columns = ['city_name', 'city_id', 'population', 'cumul_cnt', 'time_bin', 'geometry']
        shp_path = os.path.join(cumulative_dir, f'cities_cum_{bin_str}.shp')
        cities_cum_shp.to_file(shp_path)
        cumulative_bin_files['cities'].append(shp_path)

        print(f"    Cities cumulative: {len(cities_cum)} features")

    # === CREATE MASTER FILES BY MERGING SHAPEFILES ===
    print(f"\n  Creating master files by merging shapefiles...")
    
    # INCREMENTAL MASTERS
    if incremental_bin_files['states']:
        print(f"    Merging {len(incremental_bin_files['states'])} state incremental files...")
        gdfs = []
        for f in incremental_bin_files['states']:
            gdf = gpd.read_file(f)
            gdfs.append(gdf)
        
        merged = pd.concat(gdfs, ignore_index=True)
        merged_gdf = gpd.GeoDataFrame(merged, geometry='geometry', crs=gdfs[0].crs)
        
        output_path = os.path.join(incremental_dir, 'states_INCREMENTAL_ALL.shp')
        merged_gdf.to_file(output_path)
        print(f"    ✓ States incremental master: {len(merged_gdf)} records")

    if incremental_bin_files['counties']:
        print(f"    Merging {len(incremental_bin_files['counties'])} county incremental files...")
        gdfs = []
        for f in incremental_bin_files['counties']:
            gdf = gpd.read_file(f)
            gdfs.append(gdf)
        
        merged = pd.concat(gdfs, ignore_index=True)
        merged_gdf = gpd.GeoDataFrame(merged, geometry='geometry', crs=gdfs[0].crs)
        
        output_path = os.path.join(incremental_dir, 'counties_INCREMENTAL_ALL.shp')
        merged_gdf.to_file(output_path)
        print(f"    ✓ Counties incremental master: {len(merged_gdf)} records")

    if incremental_bin_files['cities']:
        print(f"    Merging {len(incremental_bin_files['cities'])} city incremental files...")
        gdfs = []
        for f in incremental_bin_files['cities']:
            gdf = gpd.read_file(f)
            gdfs.append(gdf)
        
        merged = pd.concat(gdfs, ignore_index=True)
        merged_gdf = gpd.GeoDataFrame(merged, geometry='geometry', crs=gdfs[0].crs)
        
        output_path = os.path.join(incremental_dir, 'cities_INCREMENTAL_ALL.shp')
        merged_gdf.to_file(output_path)
        print(f"    ✓ Cities incremental master: {len(merged_gdf)} records")

    # CUMULATIVE MASTERS
    if cumulative_bin_files['states']:
        print(f"    Merging {len(cumulative_bin_files['states'])} state cumulative files...")
        gdfs = []
        for f in cumulative_bin_files['states']:
            gdf = gpd.read_file(f)
            gdfs.append(gdf)
        
        merged = pd.concat(gdfs, ignore_index=True)
        merged_gdf = gpd.GeoDataFrame(merged, geometry='geometry', crs=gdfs[0].crs)
        
        output_path = os.path.join(cumulative_dir, 'states_CUMULATIVE_ALL.shp')
        merged_gdf.to_file(output_path)
        print(f"    ✓ States cumulative master: {len(merged_gdf)} records")

    if cumulative_bin_files['counties']:
        print(f"    Merging {len(cumulative_bin_files['counties'])} county cumulative files...")
        gdfs = []
        for f in cumulative_bin_files['counties']:
            gdf = gpd.read_file(f)
            gdfs.append(gdf)
        
        merged = pd.concat(gdfs, ignore_index=True)
        merged_gdf = gpd.GeoDataFrame(merged, geometry='geometry', crs=gdfs[0].crs)
        
        output_path = os.path.join(cumulative_dir, 'counties_CUMULATIVE_ALL.shp')
        merged_gdf.to_file(output_path)
        print(f"    ✓ Counties cumulative master: {len(merged_gdf)} records")

    if cumulative_bin_files['cities']:
        print(f"    Merging {len(cumulative_bin_files['cities'])} city cumulative files...")
        gdfs = []
        for f in cumulative_bin_files['cities']:
            gdf = gpd.read_file(f)
            gdfs.append(gdf)
        
        merged = pd.concat(gdfs, ignore_index=True)
        merged_gdf = gpd.GeoDataFrame(merged, geometry='geometry', crs=gdfs[0].crs)
        
        output_path = os.path.join(cumulative_dir, 'cities_CUMULATIVE_ALL.shp')
        merged_gdf.to_file(output_path)
        print(f"    ✓ Cities cumulative master: {len(merged_gdf)} records")

    print(f"\n{'='*60}")
    print("TEMPORAL EXPORT COMPLETE!")
    print("="*60)
    print(f"\nFiles saved to: {os.path.abspath(temporal_dir)}")
    print(f"\nOutput structure:")
    print(f"  incremental/ - Counts for just that 4-hour bin")
    print(f"  cumulative/  - Running total (persists even if bin has 0 new mentions)")
    print(f"\nIndividual bin files + merged master *_ALL.shp files")
    print(f"\nTo use in ArcGIS Pro:")
    print(f"  1. Add *_INCREMENTAL_ALL.shp or *_CUMULATIVE_ALL.shp")
    print(f"  2. Enable time using 'time_bin' field")
    print(f"  3. Set time step to 4 hours")
    print(f"  4. Animate!")

# Execute temporal export
export_temporal_to_arcgis(
    temporal_data, time_bins, 
    us_states_gdf, us_counties_gdf, us_cities_gdf
)

print("\n\nSummary:")
print(f"Total time bins: {len(time_bins)}")
print(f"Time range: {time_bins[0].strftime('%Y-%m-%d %H:%M:%S')} to {time_bins[-1].strftime('%Y-%m-%d %H:%M:%S')}")


EXPORTING TEMPORAL DATA - INCREMENTAL & CUMULATIVE

Time bins: 11
Output directory: arcgis_outputs\temporal_4hour_bins

Entities ever mentioned:
  States: 16
  Counties: 174
  Cities: 296

  Processing time bin 1/11: 2024-09-26 00:00:00
    States incremental: 7 features
    States cumulative: 7 features (total mentions so far)
    Counties incremental: 26 features
    Counties cumulative: 26 features
    Cities incremental: 28 features
    Cities cumulative: 28 features

  Processing time bin 2/11: 2024-09-26 04:00:00
    States incremental: 7 features
    States cumulative: 9 features (total mentions so far)
    Counties incremental: 40 features
    Counties cumulative: 51 features
    Cities incremental: 49 features
    Cities cumulative: 63 features

  Processing time bin 3/11: 2024-09-26 08:00:00
    States incremental: 9 features
    States cumulative: 11 features (total mentions so far)
    Counties incremental: 29 features
    Counties cumulative: 60 features
    Cities increm

In [6]:
print(90/140)

0.6428571428571429


In [7]:
# # ==============================================================================
# # HIERARCHICAL CASCADE: City Points → County Polygons → State Polygons
# # Each tweet point increments counts for city, containing county, AND containing state
# # ==============================================================================
#
# def cascade_counts_hierarchical(tweets_gdf, us_states_gdf, us_counties_gdf, us_cities_gdf):
#     """
#     Cascade tweet counts upward through geographic hierarchy.
#     Each tweet point represents a city event that also increments its county and state.
#
#     Flow: Tweet Point (lat/lon) → City → County → State
#     """
#     print("\n" + "="*60)
#     print("CASCADING COUNTS THROUGH GEOGRAPHIC HIERARCHY")
#     print("="*60)
#
#     # Ensure CRS matches  
#     tweets_gdf = tweets_gdf.to_crs("EPSG:4326")
#     us_states_gdf = us_states_gdf.to_crs("EPSG:4326")
#     us_counties_gdf = us_counties_gdf.to_crs("EPSG:4326")
#     us_cities_gdf = us_cities_gdf.to_crs("EPSG:4326")
#
#     # Initialize count dictionaries
#     city_counts = {}
#     county_counts = {}
#     state_counts = {}
#
#     print(f"\nProcessing {len(tweets_gdf)} tweet points...")
#
#     # Process each tweet point
#     for idx, tweet in tweets_gdf.iterrows():
#         if idx % 100 == 0:
#             print(f"  Processing tweet {idx}/{len(tweets_gdf)}")
#
#         tweet_point = tweet.geometry
#
#         # Find containing county (spatial join)
#         containing_county = us_counties_gdf[us_counties_gdf.contains(tweet_point)]
#
#         if len(containing_county) > 0:
#             county_geoid = containing_county.iloc[0]['GEOID']
#             county_statefp = containing_county.iloc[0]['STATEFP']
#
#             # Increment county count
#             county_counts[county_geoid] = county_counts.get(county_geoid, 0) + 1
#
#             # Find containing state (using STATEFP from county)
#             containing_state = us_states_gdf[us_states_gdf['STATEFP'] == county_statefp]
#
#             if len(containing_state) > 0:
#                 state_code = containing_state.iloc[0]['STUSPS']
#
#                 # Increment state count
#                 state_counts[state_code] = state_counts.get(state_code, 0) + 1
#
#         # Also try to match to nearest city (within reasonable distance)
#         # Find cities within 50km of tweet point
#         tweet_buffer = tweet_point.buffer(0.45)  # ~50km at equator
#         nearby_cities = us_cities_gdf[us_cities_gdf.geometry.within(tweet_buffer)]
#
#         if len(nearby_cities) > 0:
#             # Get closest city
#             distances = nearby_cities.geometry.distance(tweet_point)
#             closest_city_idx = distances.idxmin()
#             closest_city = nearby_cities.loc[closest_city_idx]
#
#             city_id = closest_city['geonameid']
#             city_counts[city_id] = city_counts.get(city_id, 0) + 1
#
#     print(f"\n  Cascade complete!")
#     print(f"    Cities with tweets: {len(city_counts)}")
#     print(f"    Counties with tweets: {len(county_counts)}")
#     print(f"    States with tweets: {len(state_counts)}")
#
#     # Create GeoDataFrames with cascaded counts
#     print(f"\n  Creating output GeoDataFrames...")
#
#     # Cities
#     city_counts_df = pd.DataFrame([
#         {'geonameid': k, 'cascade_count': v}
#         for k, v in city_counts.items()
#     ])
#     cities_cascade = us_cities_gdf.merge(city_counts_df, on='geonameid', how='left')
#     cities_cascade['cascade_count'] = cities_cascade['cascade_count'].fillna(0)
#
#     # Counties
#     county_counts_df = pd.DataFrame([
#         {'GEOID': k, 'cascade_count': v}
#         for k, v in county_counts.items()
#     ])
#     counties_cascade = us_counties_gdf.merge(county_counts_df, on='GEOID', how='left')
#     counties_cascade['cascade_count'] = counties_cascade['cascade_count'].fillna(0)
#
#     # States
#     state_counts_df = pd.DataFrame([
#         {'STUSPS': k, 'cascade_count': v}
#         for k, v in state_counts.items()
#     ])
#     states_cascade = us_states_gdf.merge(state_counts_df, on='STUSPS', how='left')
#     states_cascade['cascade_count'] = states_cascade['cascade_count'].fillna(0)
#
#     return cities_cascade, counties_cascade, states_cascade
#
# # Execute hierarchical cascade
# cities_cascade, counties_cascade, states_cascade = cascade_counts_hierarchical(
#     tweets_gdf, us_states_gdf, us_counties_gdf, us_cities_gdf
# )
#
# print("\n" + "="*60)
# print("CASCADED COUNTS SUMMARY")
# print("="*60)
#
# print("\nTop states by cascaded counts (from tweet points):")
# print(states_cascade[states_cascade['cascade_count'] > 0][['NAME', 'STUSPS', 'cascade_count']].sort_values('cascade_count', ascending=False).head(10))
#
# print("\nTop counties by cascaded counts (from tweet points):")
# print(counties_cascade[counties_cascade['cascade_count'] > 0][['NAME', 'GEOID', 'cascade_count']].sort_values('cascade_count', ascending=False).head(10))
#
# print("\nTop cities by cascaded counts (from tweet points):")
# print(cities_cascade[cities_cascade['cascade_count'] > 0][['name', 'geonameid', 'cascade_count']].sort_values('cascade_count', ascending=False).head(10))


CASCADING COUNTS THROUGH GEOGRAPHIC HIERARCHY

Processing 3007 tweet points...
  Processing tweet 0/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 100/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 200/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 300/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 400/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 500/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 600/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 700/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 800/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 900/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

  Processing tweet 1000/3007



  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.distance(tweet_point)

  distances = nearby_cities.geometry.di

KeyboardInterrupt: 