### Setup

In [None]:
import geopandas as gpd
from shapely.geometry import box
import pandas as pd
import numpy as np
import altair as alt
import json
import os

In [None]:
# import prison boundaries as shapefile from Department of Homeland Security
prisonsRaw = gpd.read_file('https://opendata.arcgis.com/api/v3/datasets/2d6109d4127d458eaf0958e4c5296b67_0/downloads/data?format=geojson&spatialRefId=4326&where=1%3D1')

# Load the clean list of prisons from a CSV file, ensuring 'FACILITYID' is read as a string
prisonsClean = pd.read_csv('../prison_datasets/state_fed_prisons.csv', dtype={'FACILITYID': str})

# Ensure 'FACILITYID' in the raw prisons data is treated as a string
prisonsRaw['FACILITYID'] = prisonsRaw['FACILITYID'].astype(str)

# Filter the raw prisons data to include only those records with 'FACILITYID' present in the clean list
filtered_prisons = prisonsRaw[prisonsRaw['FACILITYID'].isin(prisonsClean['FACILITYID'])]

# Create a deep copy of the filtered prisons data for further processing
prisonsFinal = filtered_prisons.copy(deep=True)

## Buffer Creation

In [None]:
prisonsFinal.head()

In [None]:
# Define a function to create a buffered square around the geometry
def create_buffered_square(geometry, buffer_distance=100):
    # Get the centroid of the geometry to determine the UTM zone
    centroid = geometry.centroid
    utm_zone = int((centroid.x + 180) // 6) + 1
    utm_crs = f'EPSG:{32600 + utm_zone}'

    # Project geometry to UTM by converting it to a GeoSeries temporarily
    geometry_utm = gpd.GeoSeries([geometry], crs='EPSG:4326').to_crs(utm_crs)

    # Get the bounds in the projected CRS
    minx, miny, maxx, maxy = geometry_utm.total_bounds

    # Create bounding box with set buffer distance
    buffered_minx = minx - buffer_distance
    buffered_miny = miny - buffer_distance
    buffered_maxx = maxx + buffer_distance
    buffered_maxy = maxy + buffer_distance

    # Create a square that contains the buffered bounding box
    square = box(buffered_minx, buffered_miny, buffered_maxx, buffered_maxy)

    # Project the square back to the original CRS
    square_original_crs = gpd.GeoSeries([square], crs=utm_crs).to_crs('EPSG:4326')

    return square_original_crs.iloc[0]

# Create square for each prison
prisonsFinal['buffered_square'] = prisonsFinal['geometry'].apply(create_buffered_square)


In [None]:
prisonsFinal.columns

In [None]:
prisonsFinal = prisonsFinal.drop(columns=['FID', 'ZIP4', 'TELEPHONE', 'TYPE', 'STATUS', 'POPULATION', 'COUNTY', 
                   'COUNTYFIPS', 'COUNTRY', 'NAICS_CODE', 'NAICS_DESC', 'SOURCE', 'SOURCEDATE', 
                   'VAL_METHOD', 'VAL_DATE', 'WEBSITE', 'SECURELVL', 'CAPACITY', 'SHAPE_Leng', 
                   'GlobalID', 'CreationDate', 'Creator', 'EditDate', 'Editor', 'SHAPE_Length', 'SHAPE_Area'])


In [None]:
prisonsFinal = prisonsFinal.rename(columns={'geometry': 'prison_outline', 'buffered_square': 'geometry'})
prisonsFinal = prisonsFinal.set_geometry('geometry')

In [None]:
prisonsFinal = prisonsFinal.drop(columns=['prison_outline'])
prisonsFinal.to_file('allPrisonBuffers.geojson')

In [None]:
investigation_prisonsFinal = prisonsFinal[prisonsFinal['STATE'].isin(['MA', 'FL', 'CA', 'AZ', 'WA'])]

In [None]:
investigation_prisonsFinal.to_file('investigationPrisonBuffers.geojson')