In [1]:
from google.colab import drive
import json
from shapely.geometry import shape, Point

In [2]:
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Define file paths
filtered_zcta_2020 = '/content/drive/MyDrive/Colab Notebooks/filtered_zcta_2020.json'
locations = '/content/drive/MyDrive/Colab Notebooks/locations_20240821_104317.geojson'

In [4]:
# Load files
with open(filtered_zcta_2020, 'r') as file:
    zcta_data = json.load(file)

with open(locations, 'r') as file:
    locations_data = json.load(file)

In [5]:
# Dictionary to store ZCTA polygons and their associated articleIDs
zcta_articles = {}

In [6]:
# Process each ZCTA polygon
for zcta_feature in zcta_data['features']:
    zcta_polygon = shape(zcta_feature['geometry'])  # Create a shapely polygon
    zcta_code = zcta_feature['properties']['ZCTA5CE20']  # Get the ZCTA code

    # Initialize an empty set to store unique articleIDs
    zcta_articles[zcta_code] = set()

    # Process each point in the locations data
    for location_feature in locations_data['features']:
        location_point = Point(location_feature['geometry']['coordinates'])

        # Check if the point is within the ZCTA polygon
        if zcta_polygon.contains(location_point):
            article_ids = location_feature['properties']['articleIDs'].split(',')
            zcta_articles[zcta_code].update(article_ids)  # Add the articleIDs to the set

In [7]:
# Create a new GeoJSON structure for the output
output_geojson = {
    "type": "FeatureCollection",
    "features": []
}

In [8]:
# Populate the output GeoJSON with ZCTA polygons and their associated articleIDs
for zcta_feature in zcta_data['features']:
    zcta_code = zcta_feature['properties']['ZCTA5CE20']

    # Add the unique articleIDs to the properties of each ZCTA
    zcta_feature['properties']['articleIDs'] = list(zcta_articles[zcta_code])

    # Calculate the centroid of the ZCTA polygon
    centroid = shape(zcta_feature['geometry']).centroid
    centroid_coordinates = [centroid.x, centroid.y]

    # Add the centroid coordinates to the properties
    zcta_feature['properties']['center'] = centroid_coordinates

    # Add the feature to the output GeoJSON
    output_geojson['features'].append(zcta_feature)

In [9]:
# Save the output GeoJSON to your Google Drive
output_file_path = './drive/MyDrive/Colab Notebooks/aggregated_locations.geojson'
with open(output_file_path, 'w') as f:
    json.dump(output_geojson, f)