In [1]:
import geopandas as gpd
import os
os.chdir('/Users/ichittumuri/Desktop/MINES/COGCC-Risk-Analysis/Data')

pop_density=gpd.read_file('Population_Density_(Census_Tracts)')
flowlines=gpd.read_file('similarity_matrix_result.geojson')

In [2]:
# Check the format of the population data - this code uses the census tract data available at:
# https://data-cdphe.opendata.arcgis.com/datasets/CDPHE::population-density-census-tracts/explore?location=38.499827%2C-102.988618%2C6.77

print('Summary of Census Tract Data:')
print(pop_density.info())  # General information about the dataset
print('\nFirst few rows of the data:')
print(pop_density.head())  # Preview the first few rows

Summary of Census Tract Data:
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1249 entries, 0 to 1248
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   OBJECTID    1249 non-null   int32   
 1   FIPS        1249 non-null   object  
 2   County      1249 non-null   object  
 3   Tract_Name  1249 non-null   object  
 4   Area_Land_  1249 non-null   float64 
 5   Population  1249 non-null   int32   
 6   Populati_1  1249 non-null   float64 
 7   geometry    1249 non-null   geometry
dtypes: float64(2), geometry(1), int32(2), object(3)
memory usage: 68.4+ KB
None

First few rows of the data:
   OBJECTID         FIPS    County  \
0         1  08043979000   FREMONT   
1         2  08045951600  GARFIELD   
2         3  08069002803   LARIMER   
3         4  08125963200      YUMA   
4         5  08069002401   LARIMER   

                                     Tract_Name  Area_Land_  Population  \
0   Census Tract 9790,

In [3]:
print(pop_density['Populati_1'].describe())


count     1249.000000
mean      3664.680384
std       3641.594843
min          0.000000
25%        466.600000
50%       3139.000000
75%       5487.300000
max      33066.700000
Name: Populati_1, dtype: float64


In [4]:
# Ensure matching coordinates

pop_density = pop_density.to_crs(flowlines.crs)

print(f'Flowline CRS: {flowlines.crs}')
print(f'Population CRS: {pop_density.crs}')

Flowline CRS: EPSG:4269
Population CRS: EPSG:4269


In [5]:
# Create a buffer around each line in which to calculate population density
# If a buffer lies within multiple tracts, we will take the average density between the tracts.
# A better approach would be an average weighted by the proportion of the buffer in each tract, but I am still working on this.
# It may be possible that our lines are cut into small enough chunks that this isn't an issue.

buffer_distance = 10  # buffer distance in meters
flowlines['buffer'] = flowlines.geometry.buffer(buffer_distance)

buffered_flowlines = gpd.GeoDataFrame(flowlines.drop(columns='geometry'), geometry=flowlines["buffer"])  # create a new dataframe that uses the buffered geometry
buffered_flowlines = buffered_flowlines.reset_index()  # reset index columns

# Perform a left spatial join with the updated predicate parameter
joined = gpd.sjoin(buffered_flowlines, pop_density, how='left', predicate='intersects')


  flowlines['buffer'] = flowlines.geometry.buffer(buffer_distance)


In [6]:
# test how many of the buffers intersect at least one tract

intersection_test = buffered_flowlines.geometry.intersects(pop_density.geometry.unary_union)
print(intersection_test.sum())  

  intersection_test = buffered_flowlines.geometry.intersects(pop_density.geometry.unary_union)


15911


In [7]:
# calculate average density for each buffered line and put it in a new column

aggregated_density = (joined.groupby("index")["Populati_1"].mean().reset_index(name="average_pop_density")) 

flowlines=flowlines.merge(aggregated_density, left_index=True, right_on='index', how='left')


In [8]:
flowlines

Unnamed: 0,operator_number,flowline_id,location_id,status,location_type,fluid,material,diameter_in,length_ft,max_operating_pressure,shape_length,line_age_yr,root_cause,risk,risk_probability,geometry,buffer,index,average_pop_density
0,10110,470450.0,470449.0,Active,Production Facilities,Crude Oil,Steel,2,542,34,165,52,Unknown,1,1.000000,"LINESTRING (-104.47065 39.8447, -104.47245 39....","POLYGON ((-107.23386 30.23405, -108.16263 30.5...",0,3664.680384
1,69175,477981.0,447490.0,New Construction,Production Facilities,Other,Carbon Steel,3,404,2430,123,7,Unknown,1,1.000000,"LINESTRING (-104.65739 40.36763, -104.65647 40...","POLYGON ((-102.73157 50.18043, -101.779 49.944...",1,3664.680384
2,47120,457300.0,318070.0,Active,Production Facilities,Produced Water,Steel,2,18,2030,5,13,Unknown,1,1.000000,"LINESTRING (-104.68709 40.15947, -104.68709 40...","POLYGON ((-94.69273 39.82351, -94.77378 38.845...",2,3664.680384
3,100322,457931.0,422528.0,Active,Production Facilities,Multiphase,Carbon Steel,2,1135,435,345,20,Unknown,1,1.000000,"LINESTRING (-104.58882 40.32276, -104.58707 40...","POLYGON ((-96.09493 45.60044, -95.61825 44.742...",3,3664.680384
4,100322,466186.0,455178.0,Abandoned,Production Facilities,Multiphase,Carbon Steel,2,768,1006,234,31,Unknown,1,1.000000,"LINESTRING (-104.74646 40.49752, -104.74466 40...","POLYGON ((-98.09252 47.96244, -97.3927 47.2744...",4,3664.680384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15906,96155,456386.0,433999.0,Active,Production Facilities,Multiphase,Carbon Steel,3,1404,150,427,6,No Spill,0,0.873322,"LINESTRING (-103.84883 40.80845, -103.84502 40...","POLYGON ((-106.01459 50.57111, -105.04732 50.7...",15906,3664.680384
15907,96155,456381.0,433999.0,Active,Production Facilities,Multiphase,Carbon Steel,3,1404,150,427,6,No Spill,0,0.873322,"LINESTRING (-103.84883 40.80845, -103.84502 40...","POLYGON ((-106.01459 50.57111, -105.04732 50.7...",15907,3664.680384
15908,96155,456382.0,433999.0,Active,Production Facilities,Multiphase,Carbon Steel,3,1404,150,427,6,No Spill,0,0.873322,"LINESTRING (-103.84883 40.80845, -103.84502 40...","POLYGON ((-106.01459 50.57111, -105.04732 50.7...",15908,3664.680384
15909,35080,455592.0,443145.0,Active,Production Facilities,Crude Oil,HDPE,3,1175,40,358,7,No Spill,0,0.796337,"LINESTRING (-103.40274 39.56441, -103.39982 39...","POLYGON ((-103.41584 49.5644, -102.43559 49.51...",15909,3664.680384


In [9]:
# Save the updated file
flowlines.drop(columns=['buffer', 'index']).to_file(
    'flowlines_with_pop_density.geojson',
    driver='GeoJSON'
)


In [10]:
flowlines_with_pop_density=gpd.read_file('flowlines_with_pop_density.geojson')

In [12]:
flowlines_with_pop_density.tail()

Unnamed: 0,operator_number,flowline_id,location_id,status,location_type,fluid,material,diameter_in,length_ft,max_operating_pressure,shape_length,line_age_yr,root_cause,risk,risk_probability,average_pop_density,geometry
15906,96155,456386.0,433999.0,Active,Production Facilities,Multiphase,Carbon Steel,3,1404,150,427,6,No Spill,0,0.873322,3664.680384,"LINESTRING (-103.84883 40.80845, -103.84502 40..."
15907,96155,456381.0,433999.0,Active,Production Facilities,Multiphase,Carbon Steel,3,1404,150,427,6,No Spill,0,0.873322,3664.680384,"LINESTRING (-103.84883 40.80845, -103.84502 40..."
15908,96155,456382.0,433999.0,Active,Production Facilities,Multiphase,Carbon Steel,3,1404,150,427,6,No Spill,0,0.873322,3664.680384,"LINESTRING (-103.84883 40.80845, -103.84502 40..."
15909,35080,455592.0,443145.0,Active,Production Facilities,Crude Oil,HDPE,3,1175,40,358,7,No Spill,0,0.796337,3664.680384,"LINESTRING (-103.40274 39.56441, -103.39982 39..."
15910,39560,455244.0,318928.0,Abandoned,Production Facilities,Condensate,Steel,3,199,1000,60,41,No Spill,0,0.750714,3664.680384,"LINESTRING (-105.04049 40.14339, -105.04114 40..."
