In [1]:
import geopandas as gpd
import rasterio
from rasterio import features
import pandas as pd
import numpy as np
import os
from shapely.geometry import LineString
from pyproj import CRS


os.chdir('/Users/ichittumuri/Desktop/MINES/COGCC-Risk-Analysis/Data')

flowlines=gpd.read_file('flowlines_with_pop.geojson')
dem = rasterio.open('output_USGS30m.tif')

In [2]:
# Ensure both datasets are in the same CRS
dem_crs = CRS(dem.crs)

if flowlines.crs != dem_crs:
    flowlines = flowlines.to_crs(dem_crs)

def sample_elevations_along_line(line_geom, dem, num_samples=100):
    """
    Samples elevations along a line geometry from a DEM raster.
    
    Parameters:
    - line_geom: Shapely LineString geometry of the flowline.
    - dem: Opened rasterio DEM object.
    - num_samples: Number of points to sample along the line.

    Returns:
    - elevations: List of elevation values sampled from the DEM.
    """
    # Generate equally spaced points along the line
    distances = np.linspace(0, line_geom.length, num_samples)
    points = [line_geom.interpolate(distance) for distance in distances]
    
    # Get the coordinates of the points
    coords = [(point.x, point.y) for point in points]
    
    # Sample the DEM at these points
    elevations = []
    for coord in coords:
        try:
            # Sample the DEM at the coordinate
            elevation = list(dem.sample([coord]))[0][0]
            if dem.nodata is not None and elevation == dem.nodata:
                elevations.append(None)
            else:
                elevations.append(elevation)
        except Exception:
            elevations.append(None)
    
    # Filter out None or NaN values
    elevations = [e for e in elevations if e is not None and not np.isnan(e)]
    return elevations

# Prepare a list to store the results
results = []

for idx, row in flowlines.iterrows():
    line_geom = row.geometry
    
    # Sample elevations along the line
    elevations = sample_elevations_along_line(line_geom, dem)
    
    # Calculate statistics if we have valid elevations
    if elevations:
        max_elev = max(elevations)
        min_elev = min(elevations)
        avg_elev = sum(elevations) / len(elevations)
    else:
        max_elev = min_elev = avg_elev = None
    
    # Store the original index, plus your stats
    results.append({
        'flow_idx': idx,            # The original index from flowlines
        'Max_Elevation': max_elev,
        'Min_Elevation': min_elev,
        'Avg_Elevation': avg_elev
    })

# Create a DataFrame from the results
elevation_df = pd.DataFrame(results)

# Make 'flow_idx' the index
elevation_df.set_index('flow_idx', inplace=True)

# Export the results to an Excel file
# Replace 'flowline_elevations.xlsx' with your desired output file name
elevation_df.to_excel('flowline_elevations.xlsx', index=False)

print("Elevation statistics calculated and saved to 'flowline_elevations.xlsx'.")

Elevation statistics calculated and saved to 'flowline_elevations.xlsx'.


In [3]:
flowlines_merged = flowlines.join(elevation_df, how='left')

In [4]:
flowlines_merged.drop(columns=['Max_Elevation', 'Min_Elevation'], errors='ignore', inplace=True)

In [5]:
flowlines_merged = flowlines_merged.rename(columns={'Avg_Elevation': 'avg_elevation'})
flowlines_merged = flowlines_merged.rename(columns={'average_pop_density': 'avg_population'})

In [6]:
flowlines_merged.head()

Unnamed: 0,unique_id,operator_number,flowline_id,location_id,status,flowline_action,location_type,fluid,material,diameter_in,length_ft,max_operating_pressure,line_age_yr,construct_date,spill_date,root_cause,risk,avg_population,geometry,avg_elevation
0,flowline_5162,10110,470450.0,470449.0,Active,,Production Facilities,Crude Oil,Steel,2,542,34,52,1972-08-07,2018-01-23 00:00:00,Unknown,1,25.8,"LINESTRING (-104.47065 39.8447, -104.47245 39....",1613.260254
1,flowline_5631,69175,477981.0,447490.0,New Construction,Registration,Production Facilities,Other,Carbon Steel,3,404,2020,7,2018-01-04,2018-02-08 00:00:00,Unknown,1,50.3,"LINESTRING (-104.65739 40.36763, -104.65647 40...",1419.840332
2,flowline_14531,47120,457300.0,318070.0,Active,Abandonment,Production Facilities,Produced Water,Steel,2,18,2030,13,2011-08-10,2018-03-09 00:00:00,Unknown,1,37.5,"LINESTRING (-104.68709 40.15947, -104.68709 40...",1488.272827
3,flowline_14519,100322,457931.0,422528.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,2,1135,435,21,2004-05-10,2018-03-15 00:00:00,Unknown,1,49.0,"LINESTRING (-104.58882 40.32276, -104.58707 40...",1468.571411
4,flowline_11319,100322,466186.0,455178.0,Abandoned,Registration,Production Facilities,Multiphase,Carbon Steel,2,768,1006,31,1993-11-07,2018-03-22 00:00:00,Unknown,1,112.4,"LINESTRING (-104.74646 40.49752, -104.74466 40...",1470.980591


In [7]:
# Reorder df
new_order = ['unique_id', 'operator_number', 'flowline_id', 'location_id', 'status', 'flowline_action', 'location_type', 'fluid',
             'material', 'diameter_in', 'length_ft', 'max_operating_pressure', 'avg_population', 'avg_elevation', 'line_age_yr', 'construct_date', 'spill_date','root_cause','risk', 'geometry']
flowlines_merged = flowlines_merged[new_order]
flowlines_merged

Unnamed: 0,unique_id,operator_number,flowline_id,location_id,status,flowline_action,location_type,fluid,material,diameter_in,length_ft,max_operating_pressure,avg_population,avg_elevation,line_age_yr,construct_date,spill_date,root_cause,risk,geometry
0,flowline_5162,10110,470450.0,470449.0,Active,,Production Facilities,Crude Oil,Steel,2,542,34,25.8,1613.260254,52,1972-08-07,2018-01-23 00:00:00,Unknown,1,"LINESTRING (-104.47065 39.8447, -104.47245 39...."
1,flowline_5631,69175,477981.0,447490.0,New Construction,Registration,Production Facilities,Other,Carbon Steel,3,404,2020,50.3,1419.840332,7,2018-01-04,2018-02-08 00:00:00,Unknown,1,"LINESTRING (-104.65739 40.36763, -104.65647 40..."
2,flowline_14531,47120,457300.0,318070.0,Active,Abandonment,Production Facilities,Produced Water,Steel,2,18,2030,37.5,1488.272827,13,2011-08-10,2018-03-09 00:00:00,Unknown,1,"LINESTRING (-104.68709 40.15947, -104.68709 40..."
3,flowline_14519,100322,457931.0,422528.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,2,1135,435,49.0,1468.571411,21,2004-05-10,2018-03-15 00:00:00,Unknown,1,"LINESTRING (-104.58882 40.32276, -104.58707 40..."
4,flowline_11319,100322,466186.0,455178.0,Abandoned,Registration,Production Facilities,Multiphase,Carbon Steel,2,768,1006,112.4,1470.980591,31,1993-11-07,2018-03-22 00:00:00,Unknown,1,"LINESTRING (-104.74646 40.49752, -104.74466 40..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16116,flowline_14794,96155,456386.0,433999.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,3,1404,150,2.3,1453.729248,6,2018-06-13,,,0,"LINESTRING (-103.84883 40.80845, -103.84502 40..."
16117,flowline_14795,96155,456381.0,433999.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,3,1404,150,2.3,1453.729248,6,2018-06-16,,,0,"LINESTRING (-103.84883 40.80845, -103.84502 40..."
16118,flowline_14796,96155,456382.0,433999.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,3,1404,150,2.3,1453.729248,6,2018-06-13,,,0,"LINESTRING (-103.84883 40.80845, -103.84502 40..."
16119,flowline_14902,35080,455592.0,443145.0,Active,Registration,Production Facilities,Crude Oil,HDPE,3,1175,40,0.9,1576.464722,7,2017-11-15,,,0,"LINESTRING (-103.40274 39.56441, -103.39982 39..."


In [8]:
flowlines_merged.to_file("final_dataset.geojson", driver='GeoJSON')

In [13]:
# Step 1: Count unique IDs
unique_flowline_ids = flowlines_merged['unique_id'].dropna().unique()
print(f"Total unique flowlines in 'flowlines_merged': {len(unique_flowline_ids)}")

# Step 2: Optional – preview or store the list
unique_flowline_id_list = list(unique_flowline_ids)
# print(unique_flowline_id_list[:10])  # Uncomment to preview first 10

# Step 3: Load full-length flowlines and filter
full_flowlines = gpd.read_file('full_length_flowlines.geojson')
updated_full_flowlines = full_flowlines[full_flowlines['unique_id'].isin(unique_flowline_id_list)]

# Optional: Save the filtered result if needed
updated_full_flowlines.to_file('updated_full_flowlines.geojson', driver='GeoJSON')

# Final confirmation
print(f"Updated flowlines retained: {len(updated_full_flowlines)}")

Total unique flowlines in 'flowlines_merged': 6311
Updated flowlines retained: 6311


In [14]:
filtered_full_flowlines

Unnamed: 0,ACTIONDESCRIPTION,BEDDINGMATERIAL,COMPANY_NAME,CONSTRUCTDATE,Diam_in,ENDLAT,ENDLONG,ENTIRELINEREMOVED,FLOWLINEACTION,FLOWLINEID,...,PIPEMATERIAL,RECEIVE_DATE,SHAPE_Length,STARTLAT,STARTLOCATIONID,STARTLONG,Status,TYPEOFFLUIDTRANS,unique_id,geometry
0,The flowline serving the Emerson 3-29J (05-123...,Native Materials,CRESTONE PEAK RESOURCES OPERATING LLC ...,1983-11-09,2.00,40.109444,-104.909686,,,470446.0,...,Carbon Steel,2023-10-24 08:33:49.223,414.751530,40.105743,,-104.909860,Active,Multiphase,flowline_1,"MULTILINESTRING ((507682.447 4439497.657, 5076..."
1,The flowline (12311399_FL) servicing the Emers...,Native Materials,CRESTONE PEAK RESOURCES OPERATING LLC ...,1983-12-07,2.00,40.109441,-104.909670,True,Out of Service,470445.0,...,Carbon Steel,2023-10-24 08:33:49.223,312.594254,40.112203,319521.0,-104.909862,Active,Multiphase,flowline_2,"MULTILINESTRING ((507681.553 4440214.644, 5076..."
5,This is the only off-location flowline owned b...,Native Materials,PETERSON ENERGY OPERATING INC ...,1979-04-08,2.00,40.063090,-105.034120,,Pre-Abandonment Notice,467411.0,...,Fiberglass,2023-10-24 16:00:01.117,84.099157,40.063610,318680.0,-105.034830,Active,Crude Oil Emulsion,flowline_6,"MULTILINESTRING ((497028.859 4434818.938, 4970..."
7,The flowline (12325148FL) servicing the Elliot...,Native Materials,EXTRACTION OIL & GAS INC ...,2007-06-06,2.25,40.267332,-105.037459,True,Out of Service,462601.0,...,Carbon Steel,2023-10-24 10:08:19.323,155.389195,40.266917,306712.0,-105.043033,Out of Service,Multiphase,flowline_8,"MULTILINESTRING ((496682.46 4457399.772, 49668..."
8,The flowline (12331240FL) servicing the Elliot...,Native Materials,EXTRACTION OIL & GAS INC ...,2010-05-06,2.25,40.267332,-105.037459,True,Out of Service,462602.0,...,Carbon Steel,2023-10-24 10:08:19.323,486.830336,40.267057,415954.0,-105.039012,Out of Service,Multiphase,flowline_9,"MULTILINESTRING ((496340.456 4457384.126, 4963..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14795,,Native Materials,WHITING OIL & GAS CORPORATION ...,2018-06-13,3.50,40.809314,-103.845055,,Registration,456382.0,...,Carbon Steel,2018-07-10 10:35:34.937,427.915127,40.808659,434267.0,-103.848723,Active,Multiphase,flowline_14796,"MULTILINESTRING ((597095.529 4518130.982, 5970..."
14901,,Native Materials,GRAND MESA OPERATING CO ...,2017-11-15,3.50,39.565020,-103.403050,,Registration,455592.0,...,HDPE,2018-04-25 14:48:17.890,358.332547,39.564410,438955.0,-103.399820,Active,Crude Oil,flowline_14902,"MULTILINESTRING ((637207.682 4380630.737, 6371..."
14903,,,NOBLE ENERGY INC ...,2007-11-13,2.00,40.276203,-104.862904,,Registration,455420.0,...,,2018-05-16 14:52:52.080,588.152892,40.281428,332552.0,-104.861050,Active,,flowline_14904,"MULTILINESTRING ((511798.245 4458997.213, 5116..."
14913,,Native Materials,TOP OPERATING COMPANY ...,1983-02-15,3.00,40.143491,-105.041139,,Registration,455244.0,...,Other,2018-05-16 13:30:33.313,60.816210,40.147290,319168.0,-105.040915,Abandoned,Condensate,flowline_14914,"MULTILINESTRING ((496551.343 4443672.866, 4964..."
