In [2]:
import geopandas as gpd
import rasterio
from rasterio import features
import pandas as pd
import numpy as np
import os
from shapely.geometry import LineString
from pyproj import CRS


os.chdir('/Users/ichittumuri/Desktop/MINES/COGCC-Risk-Analysis/Data')

flowlines=gpd.read_file('flowlines_with_pop.geojson')
dem = rasterio.open('output_USGS30m.tif')

In [3]:
# Ensure both datasets are in the same CRS
dem_crs = CRS(dem.crs)

if flowlines.crs != dem_crs:
    flowlines = flowlines.to_crs(dem_crs)

def sample_elevations_along_line(line_geom, dem, num_samples=100):
    """
    Samples elevations along a line geometry from a DEM raster.
    
    Parameters:
    - line_geom: Shapely LineString geometry of the flowline.
    - dem: Opened rasterio DEM object.
    - num_samples: Number of points to sample along the line.

    Returns:
    - elevations: List of elevation values sampled from the DEM.
    """
    # Generate equally spaced points along the line
    distances = np.linspace(0, line_geom.length, num_samples)
    points = [line_geom.interpolate(distance) for distance in distances]
    
    # Get the coordinates of the points
    coords = [(point.x, point.y) for point in points]
    
    # Sample the DEM at these points
    elevations = []
    for coord in coords:
        try:
            # Sample the DEM at the coordinate
            elevation = list(dem.sample([coord]))[0][0]
            if dem.nodata is not None and elevation == dem.nodata:
                elevations.append(None)
            else:
                elevations.append(elevation)
        except Exception:
            elevations.append(None)
    
    # Filter out None or NaN values
    elevations = [e for e in elevations if e is not None and not np.isnan(e)]
    return elevations

# Prepare a list to store the results
results = []

for idx, row in flowlines.iterrows():
    line_geom = row.geometry
    
    # Sample elevations along the line
    elevations = sample_elevations_along_line(line_geom, dem)
    
    # Calculate statistics if we have valid elevations
    if elevations:
        max_elev = max(elevations)
        min_elev = min(elevations)
        avg_elev = sum(elevations) / len(elevations)
    else:
        max_elev = min_elev = avg_elev = None
    
    # Store the original index, plus your stats
    results.append({
        'flow_idx': idx,            # The original index from flowlines
        'Max_Elevation': max_elev,
        'Min_Elevation': min_elev,
        'Avg_Elevation': avg_elev
    })

# Create a DataFrame from the results
elevation_df = pd.DataFrame(results)

# Make 'flow_idx' the index
elevation_df.set_index('flow_idx', inplace=True)

# Export the results to an Excel file
# Replace 'flowline_elevations.xlsx' with your desired output file name
elevation_df.to_excel('flowline_elevations.xlsx', index=False)

print("Elevation statistics calculated and saved to 'flowline_elevations.xlsx'.")

Elevation statistics calculated and saved to 'flowline_elevations.xlsx'.


In [4]:
flowlines_merged = flowlines.join(elevation_df, how='left')

In [5]:
flowlines_merged.drop(columns=['Max_Elevation', 'Min_Elevation'], errors='ignore', inplace=True)

In [6]:
flowlines_merged = flowlines_merged.rename(columns={'Avg_Elevation': 'avg_elevation'})
flowlines_merged = flowlines_merged.rename(columns={'average_pop_density': 'avg_population'})

In [7]:
flowlines_merged.head()

Unnamed: 0,unique_id,operator_number,flowline_id,location_id,status,flowline_action,location_type,fluid,material,diameter_in,length_ft,max_operating_pressure,line_age_yr,construct_date,spill_date,root_cause,risk,avg_population,geometry,avg_elevation
0,16962,10110,470450.0,470449.0,Active,Pre-Abandonment Notice,Production Facilities,Crude Oil,Steel,2,542,88,52,1972-08-07,2018-01-23 00:00:00,Unknown,1,3664.680384,"LINESTRING (-104.47065 39.8447, -104.47245 39....",1613.260254
1,145049,47120,462980.0,460727.0,Removed,Abandonment,Production Facilities,Produced Water,,1,162,120,19,2006-03-09,2018-03-09 00:00:00,Unknown,1,3664.680384,"LINESTRING (-104.63096 40.1376, -104.63093 40....",1486.110596
2,34293,100322,457928.0,457851.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,2,2000,21,38,1987-03-20,2018-03-15 00:00:00,Unknown,1,3664.680384,"LINESTRING (-104.60942 40.32153, -104.60267 40...",1460.385742
3,95145,100322,455852.0,455178.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,2,852,95,28,1997-03-09,2018-03-22 00:00:00,Unknown,1,3664.680384,"LINESTRING (-104.7565 40.48315, -104.75445 40....",1454.785156
4,104079,10459,459568.0,452644.0,Abandoned,,Production Facilities,Multiphase,Steel,2,488,136,46,1979-02-11,2018-03-26 00:00:00,Corrosion,1,3664.680384,"LINESTRING (-104.77896 39.8691, -104.77877 39....",1598.973145


In [8]:
# Reorder df
new_order = ['unique_id', 'operator_number', 'flowline_id', 'location_id', 'status', 'flowline_action', 'location_type', 'fluid',
             'material', 'diameter_in', 'length_ft', 'max_operating_pressure', 'avg_population', 'avg_elevation', 'line_age_yr', 'construct_date', 'spill_date','root_cause','risk', 'geometry']
flowlines_merged = flowlines_merged[new_order]
flowlines_merged

Unnamed: 0,unique_id,operator_number,flowline_id,location_id,status,flowline_action,location_type,fluid,material,diameter_in,length_ft,max_operating_pressure,avg_population,avg_elevation,line_age_yr,construct_date,spill_date,root_cause,risk,geometry
0,16962,10110,470450.0,470449.0,Active,Pre-Abandonment Notice,Production Facilities,Crude Oil,Steel,2,542,88,3664.680384,1613.260254,52,1972-08-07,2018-01-23 00:00:00,Unknown,1,"LINESTRING (-104.47065 39.8447, -104.47245 39...."
1,145049,47120,462980.0,460727.0,Removed,Abandonment,Production Facilities,Produced Water,,1,162,120,3664.680384,1486.110596,19,2006-03-09,2018-03-09 00:00:00,Unknown,1,"LINESTRING (-104.63096 40.1376, -104.63093 40...."
2,34293,100322,457928.0,457851.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,2,2000,21,3664.680384,1460.385742,38,1987-03-20,2018-03-15 00:00:00,Unknown,1,"LINESTRING (-104.60942 40.32153, -104.60267 40..."
3,95145,100322,455852.0,455178.0,Active,Registration,Production Facilities,Multiphase,Carbon Steel,2,852,95,3664.680384,1454.785156,28,1997-03-09,2018-03-22 00:00:00,Unknown,1,"LINESTRING (-104.7565 40.48315, -104.75445 40...."
4,104079,10459,459568.0,452644.0,Abandoned,,Production Facilities,Multiphase,Steel,2,488,136,3664.680384,1598.973145,46,1979-02-11,2018-03-26 00:00:00,Corrosion,1,"LINESTRING (-104.77896 39.8691, -104.77877 39...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31413,159133,10000,463013.0,462775.0,Active,Registration,Manifold,Produced Water,Carbon Steel,3,2,60,3664.680384,2050.807861,7,2018-03-21,,,0,"LINESTRING (-107.8214 37.15909, -107.82141 37...."
31414,159134,10000,463013.0,462775.0,Active,Registration,Manifold,Produced Water,Carbon Steel,3,4,60,3664.680384,2017.066528,7,2018-03-21,,,0,"LINESTRING (-107.7276 37.13689, -107.72761 37...."
31415,159140,10456,476084.0,476062.0,Out of Service,,Well Site,Produced Water,Steel,8,1096,700,3664.680384,1868.163940,19,2005-06-01,,,0,"LINESTRING (-108.05 39.39961, -108.05252 39.39..."
31416,159141,10456,476810.0,335102.0,Abandoned,Registration,Well Site,Produced Water,High-Density Polyethylene (Hdpe),12,4,700,3664.680384,1832.454590,20,2004-11-01,,,0,"LINESTRING (-107.67276 39.47018, -107.67276 39..."


In [9]:
flowlines_merged.to_file("final_dataset.geojson", driver='GeoJSON')

In [10]:
# Step 1: Count unique IDs
unique_flowline_ids = flowlines_merged['unique_id'].dropna().unique()
print(f"Total unique flowlines in 'flowlines_merged': {len(unique_flowline_ids)}")

# Step 2: Optional – preview or store the list
unique_flowline_id_list = list(unique_flowline_ids)
# print(unique_flowline_id_list[:10])  # Uncomment to preview first 10

# Step 3: Load full-length flowlines and filter
full_flowlines = gpd.read_file('full_length_flowlines.geojson')
updated_full_flowlines = full_flowlines[full_flowlines['unique_id'].isin(unique_flowline_id_list)]

# Optional: Save the filtered result if needed
updated_full_flowlines.to_file('updated_full_flowlines.geojson', driver='GeoJSON')

# Final confirmation
print(f"Updated flowlines retained: {len(updated_full_flowlines)}")

Total unique flowlines in 'flowlines_merged': 29494
Updated flowlines retained: 29494
