In [15]:
import geopandas as gpd
import os
os.chdir('/Users/ichittumuri/Desktop/MINES/COGCC-Risk-Analysis/Data')

pop_density=gpd.read_file('Population_Density_(Census_Tracts)')
flowlines=gpd.read_file('cleaned_gdf.geojson')

In [16]:
# Check the format of the population data - this code uses the census tract data available at:
# https://data-cdphe.opendata.arcgis.com/datasets/CDPHE::population-density-census-tracts/explore?location=38.499827%2C-102.988618%2C6.77

print('Summary of Census Tract Data:')
print(pop_density.info())  # General information about the dataset
print('\nFirst few rows of the data:')
print(pop_density.head())  # Preview the first few rows

Summary of Census Tract Data:
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1249 entries, 0 to 1248
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   OBJECTID    1249 non-null   int32   
 1   FIPS        1249 non-null   object  
 2   County      1249 non-null   object  
 3   Tract_Name  1249 non-null   object  
 4   Area_Land_  1249 non-null   float64 
 5   Population  1249 non-null   int32   
 6   Populati_1  1249 non-null   float64 
 7   geometry    1249 non-null   geometry
dtypes: float64(2), geometry(1), int32(2), object(3)
memory usage: 68.4+ KB
None

First few rows of the data:
   OBJECTID         FIPS    County  \
0         1  08043979000   FREMONT   
1         2  08045951600  GARFIELD   
2         3  08069002803   LARIMER   
3         4  08125963200      YUMA   
4         5  08069002401   LARIMER   

                                     Tract_Name  Area_Land_  Population  \
0   Census Tract 9790,

In [17]:
print(pop_density['Populati_1'].describe())


count     1249.000000
mean      3664.680384
std       3641.594843
min          0.000000
25%        466.600000
50%       3139.000000
75%       5487.300000
max      33066.700000
Name: Populati_1, dtype: float64


In [18]:
# Ensure matching coordinates

pop_density = pop_density.to_crs(flowlines.crs)

print(f'Flowline CRS: {flowlines.crs}')
print(f'Population CRS: {pop_density.crs}')

Flowline CRS: EPSG:26913
Population CRS: EPSG:26913


In [19]:
# Create a buffer around each line in which to calculate population density
# If a buffer lies within multiple tracts, we will take the average density between the tracts.
# A better approach would be an average weighted by the proportion of the buffer in each tract, but I am still working on this.
# It may be possible that our lines are cut into small enough chunks that this isn't an issue.

buffer_distance = 10  # buffer distance in meters
flowlines['buffer'] = flowlines.geometry.buffer(buffer_distance)

buffered_flowlines = gpd.GeoDataFrame(flowlines.drop(columns='geometry'), geometry=flowlines["buffer"])  # create a new dataframe that uses the buffered geometry
buffered_flowlines = buffered_flowlines.reset_index()  # reset index columns

# Perform a left spatial join with the updated predicate parameter
joined = gpd.sjoin(buffered_flowlines, pop_density, how='left', predicate='intersects')

In [20]:
# test how many of the buffers intersect at least one tract

intersection_test = buffered_flowlines.geometry.intersects(pop_density.geometry.unary_union)
print(intersection_test.sum())  

  intersection_test = buffered_flowlines.geometry.intersects(pop_density.geometry.unary_union)


16121


In [21]:
# calculate average density for each buffered line and put it in a new column

aggregated_density = (joined.groupby("index")["Populati_1"].mean().reset_index(name="average_pop_density")) 

flowlines=flowlines.merge(aggregated_density, left_index=True, right_on='index', how='left')


In [22]:
flowlines.drop(columns=['buffer', 'index'], errors='ignore', inplace=True)

In [23]:
flowlines

Unnamed: 0,unique_id,operator_number,flowline_id,location_id,status,flowline_action,location_type,fluid,material,diameter_in,length_ft,max_operating_pressure,line_age_yr,construct_date,spill_date,root_cause,risk,geometry,average_pop_density
0,flowline_5162,10110,470450.0,470449.0,Active,,Production Facilities,Crude Oil,Steel,2,542,34,52,1972-08-07,2018-01-23 00:00:00,Unknown,1,"LINESTRING (545287.5 4410654.506, 545132.965 4...",25.8
1,flowline_5631,69175,477981.0,447490.0,New Construction,Registration,Production Facilities,Other,Carbon Steel,3,404,2020,7,2018-01-04,2018-02-08 00:00:00,Unknown,1,"LINESTRING (529087.406 4468617.814, 529165.786...",50.3
2,flowline_14531,47120,457300.0,318070.0,Active,Abandonment,Production Facilities,Produced Water,Steel,2,18,2030,13,2011-08-10,2018-03-09 00:00:00,Unknown,1,"LINESTRING (526647.996 4445503.586, 526647.873...",37.5
3,flowline_14519,100322,457931.0,422528.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,2,1135,435,21,2004-05-10,2018-03-15 00:00:00,Unknown,1,"LINESTRING (534932.646 4463662.371, 535082.761...",49.0
4,flowline_11319,100322,466186.0,455178.0,Abandoned,Registration,Production Facilities,Multiphase,Carbon Steel,2,768,1006,31,1993-11-07,2018-03-22 00:00:00,Unknown,1,"LINESTRING (521484.139 4483010.433, 521636.926...",112.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16116,flowline_14794,96155,456386.0,433999.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,3,1404,150,6,2018-06-13,,,0,"LINESTRING (597095.529 4518130.982, 597416.31 ...",2.3
16117,flowline_14795,96155,456381.0,433999.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,3,1404,150,6,2018-06-16,,,0,"LINESTRING (597095.529 4518130.982, 597416.31 ...",2.3
16118,flowline_14796,96155,456382.0,433999.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,3,1404,150,6,2018-06-13,,,0,"LINESTRING (597095.529 4518130.982, 597416.31 ...",2.3
16119,flowline_14902,35080,455592.0,443145.0,Active,Registration,Production Facilities,Crude Oil,HDPE,3,1175,40,7,2017-11-15,,,0,"LINESTRING (637207.682 4380630.737, 637458.657...",0.9


In [24]:
flowlines.to_file("flowlines_with_pop.geojson", driver='GeoJSON')

In [25]:
flowlines_with_pop_density=gpd.read_file('flowlines_with_pop.geojson')

In [26]:
flowlines_with_pop_density.tail()

Unnamed: 0,unique_id,operator_number,flowline_id,location_id,status,flowline_action,location_type,fluid,material,diameter_in,length_ft,max_operating_pressure,line_age_yr,construct_date,spill_date,root_cause,risk,average_pop_density,geometry
16116,flowline_14794,96155,456386.0,433999.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,3,1404,150,6,2018-06-13,,,0,2.3,"LINESTRING (597095.529 4518130.982, 597416.31 ..."
16117,flowline_14795,96155,456381.0,433999.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,3,1404,150,6,2018-06-16,,,0,2.3,"LINESTRING (597095.529 4518130.982, 597416.31 ..."
16118,flowline_14796,96155,456382.0,433999.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,3,1404,150,6,2018-06-13,,,0,2.3,"LINESTRING (597095.529 4518130.982, 597416.31 ..."
16119,flowline_14902,35080,455592.0,443145.0,Active,Registration,Production Facilities,Crude Oil,HDPE,3,1175,40,7,2017-11-15,,,0,0.9,"LINESTRING (637207.682 4380630.737, 637458.657..."
16120,flowline_14914,39560,455244.0,318928.0,Abandoned,Registration,Production Facilities,Condensate,Steel,3,199,1000,42,1983-02-15,,,0,170.2,"LINESTRING (496551.343 4443672.866, 496495.808..."
