In [1]:
import geopandas as gpd
import os
os.chdir('/Users/ichittumuri/Desktop/MINES/COGCC-Risk-Analysis/Data')

pop_density=gpd.read_file('Population_Density_(Census_Tracts)')
flowlines=gpd.read_file('cleaned_gdf.geojson')

In [2]:
# Check the format of the population data - this code uses the census tract data available at:
# https://data-cdphe.opendata.arcgis.com/datasets/CDPHE::population-density-census-tracts/explore?location=38.499827%2C-102.988618%2C6.77

print('Summary of Census Tract Data:')
print(pop_density.info())  # General information about the dataset
print('\nFirst few rows of the data:')
print(pop_density.head())  # Preview the first few rows

Summary of Census Tract Data:
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1249 entries, 0 to 1248
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   OBJECTID    1249 non-null   int32   
 1   FIPS        1249 non-null   object  
 2   County      1249 non-null   object  
 3   Tract_Name  1249 non-null   object  
 4   Area_Land_  1249 non-null   float64 
 5   Population  1249 non-null   int32   
 6   Populati_1  1249 non-null   float64 
 7   geometry    1249 non-null   geometry
dtypes: float64(2), geometry(1), int32(2), object(3)
memory usage: 68.4+ KB
None

First few rows of the data:
   OBJECTID         FIPS    County  \
0         1  08043979000   FREMONT   
1         2  08045951600  GARFIELD   
2         3  08069002803   LARIMER   
3         4  08125963200      YUMA   
4         5  08069002401   LARIMER   

                                     Tract_Name  Area_Land_  Population  \
0   Census Tract 9790,

In [3]:
print(pop_density['Populati_1'].describe())


count     1249.000000
mean      3664.680384
std       3641.594843
min          0.000000
25%        466.600000
50%       3139.000000
75%       5487.300000
max      33066.700000
Name: Populati_1, dtype: float64


In [4]:
# Ensure matching coordinates

pop_density = pop_density.to_crs(flowlines.crs)

print(f'Flowline CRS: {flowlines.crs}')
print(f'Population CRS: {pop_density.crs}')

Flowline CRS: EPSG:4326
Population CRS: EPSG:4326


In [5]:
# Create a buffer around each line in which to calculate population density
# If a buffer lies within multiple tracts, we will take the average density between the tracts.
# A better approach would be an average weighted by the proportion of the buffer in each tract, but I am still working on this.
# It may be possible that our lines are cut into small enough chunks that this isn't an issue.

buffer_distance = 10  # buffer distance in meters
flowlines['buffer'] = flowlines.geometry.buffer(buffer_distance)

buffered_flowlines = gpd.GeoDataFrame(flowlines.drop(columns='geometry'), geometry=flowlines["buffer"])  # create a new dataframe that uses the buffered geometry
buffered_flowlines = buffered_flowlines.reset_index()  # reset index columns

# Perform a left spatial join with the updated predicate parameter
joined = gpd.sjoin(buffered_flowlines, pop_density, how='left', predicate='intersects')


  flowlines['buffer'] = flowlines.geometry.buffer(buffer_distance)


In [6]:
# test how many of the buffers intersect at least one tract

intersection_test = buffered_flowlines.geometry.intersects(pop_density.geometry.unary_union)
print(intersection_test.sum())  

  intersection_test = buffered_flowlines.geometry.intersects(pop_density.geometry.unary_union)


31418


In [7]:
# calculate average density for each buffered line and put it in a new column

aggregated_density = (joined.groupby("index")["Populati_1"].mean().reset_index(name="average_pop_density")) 

flowlines=flowlines.merge(aggregated_density, left_index=True, right_on='index', how='left')


In [8]:
flowlines.drop(columns=['buffer', 'index'], errors='ignore', inplace=True)

In [9]:
flowlines

Unnamed: 0,unique_id,operator_number,flowline_id,location_id,status,flowline_action,location_type,fluid,material,diameter_in,length_ft,max_operating_pressure,line_age_yr,construct_date,spill_date,root_cause,risk,geometry,average_pop_density
0,16962,10110,470450.0,470449.0,Active,Pre-Abandonment Notice,Production Facilities,Crude Oil,Steel,2,542,88,52,1972-08-07,2018-01-23 00:00:00,Unknown,1,"LINESTRING (-104.47065 39.8447, -104.47245 39....",3664.680384
1,145049,47120,462980.0,460727.0,Removed,Abandonment,Production Facilities,Produced Water,,1,162,120,19,2006-03-09,2018-03-09 00:00:00,Unknown,1,"LINESTRING (-104.63096 40.1376, -104.63093 40....",3664.680384
2,34293,100322,457928.0,457851.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,2,2000,21,38,1987-03-20,2018-03-15 00:00:00,Unknown,1,"LINESTRING (-104.60942 40.32153, -104.60267 40...",3664.680384
3,95145,100322,455852.0,455178.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,2,852,95,28,1997-03-09,2018-03-22 00:00:00,Unknown,1,"LINESTRING (-104.7565 40.48315, -104.75445 40....",3664.680384
4,104079,10459,459568.0,452644.0,Abandoned,,Production Facilities,Multiphase,Steel,2,488,136,46,1979-02-11,2018-03-26 00:00:00,Corrosion,1,"LINESTRING (-104.77896 39.8691, -104.77877 39....",3664.680384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31413,159133,10000,463013.0,462775.0,Active,Registration,Manifold,Produced Water,Carbon Steel,3,2,60,7,2018-03-21,,,0,"LINESTRING (-107.8214 37.15909, -107.82141 37....",3664.680384
31414,159134,10000,463013.0,462775.0,Active,Registration,Manifold,Produced Water,Carbon Steel,3,4,60,7,2018-03-21,,,0,"LINESTRING (-107.7276 37.13689, -107.72761 37....",3664.680384
31415,159140,10456,476084.0,476062.0,Out of Service,,Well Site,Produced Water,Steel,8,1096,700,19,2005-06-01,,,0,"LINESTRING (-108.05 39.39961, -108.05252 39.39...",3664.680384
31416,159141,10456,476810.0,335102.0,Abandoned,Registration,Well Site,Produced Water,High-Density Polyethylene (Hdpe),12,4,700,20,2004-11-01,,,0,"LINESTRING (-107.67276 39.47018, -107.67276 39...",3664.680384


In [10]:
flowlines.to_file("flowlines_with_pop.geojson", driver='GeoJSON')

In [11]:
flowlines_with_pop_density=gpd.read_file('flowlines_with_pop.geojson')

In [12]:
flowlines_with_pop_density.tail()

Unnamed: 0,unique_id,operator_number,flowline_id,location_id,status,flowline_action,location_type,fluid,material,diameter_in,length_ft,max_operating_pressure,line_age_yr,construct_date,spill_date,root_cause,risk,average_pop_density,geometry
31413,159133,10000,463013.0,462775.0,Active,Registration,Manifold,Produced Water,Carbon Steel,3,2,60,7,2018-03-21,,,0,3664.680384,"LINESTRING (-107.8214 37.15909, -107.82141 37...."
31414,159134,10000,463013.0,462775.0,Active,Registration,Manifold,Produced Water,Carbon Steel,3,4,60,7,2018-03-21,,,0,3664.680384,"LINESTRING (-107.7276 37.13689, -107.72761 37...."
31415,159140,10456,476084.0,476062.0,Out of Service,,Well Site,Produced Water,Steel,8,1096,700,19,2005-06-01,,,0,3664.680384,"LINESTRING (-108.05 39.39961, -108.05252 39.39..."
31416,159141,10456,476810.0,335102.0,Abandoned,Registration,Well Site,Produced Water,High-Density Polyethylene (Hdpe),12,4,700,20,2004-11-01,,,0,3664.680384,"LINESTRING (-107.67276 39.47018, -107.67276 39..."
31417,159142,10559,479239.0,320870.0,Active,Registration,Well Site,Produced Water,Carbon Steel,2,68,80,19,2005-05-26,,,0,3664.680384,"LINESTRING (-107.4389 37.03008, -107.43886 37...."
