# Import CH4 aerial observations from EDF PermianMap into compiled database

## Import libraries and setup

In [1]:
import os
from datetime import datetime
from copy import deepcopy

import pandas as pd
import geopandas as gpd
import arcgis
from arcgis.gis import GIS

In [2]:
# Define list of feature service id's for EDF PermianMap plume sources
# edf_fs_ids = ['0244d23bf6184d95af9b7a59043d76db',  # Summer 2021 Sources GAO - Carbon Mapper
#               '931fc767c48b4f279a138b1e7164ca1c',  # Fall 2021 Sources GAO - Carbon Mapper
#               # Fall 2019 Sources detected by AVIRIS-NG and GAO provided by NASA-JPL / U. Arizona
#               'e647d5a509414280aa7a1fa9e04d5631'
#               ]

edf_fs_dict = {'id': ['0244d23bf6184d95af9b7a59043d76db', 
                      '931fc767c48b4f279a138b1e7164ca1c', 
                      'e647d5a509414280aa7a1fa9e04d5631'],
               'campaign': ['Summer 2021 Sources GAO - CarbonMapper', 
                            'Fall 2021 Sources GAO - Carbon Mapper', 
                            'Fall 2019 Sources detected by AVIRIS-NG and GAO provided by NASA-JPL / U. Arizona']}

edf_fs_ids = pd.DataFrame.from_dict(edf_fs_dict)

# Define feature service id for MCBU CH4 external studies compiled
external_studies_compiled_fs_id = '73fcf310aa784460a9b102b1693a9314'

# Define feature service id for MCBU facilities dataset
facilities_layer_id = '857b2093bff44132b39e65176c309284'

edf_fs_ids

Unnamed: 0,id,campaign
0,0244d23bf6184d95af9b7a59043d76db,Summer 2021 Sources GAO - CarbonMapper
1,931fc767c48b4f279a138b1e7164ca1c,Fall 2021 Sources GAO - Carbon Mapper
2,e647d5a509414280aa7a1fa9e04d5631,Fall 2019 Sources detected by AVIRIS-NG and GA...


## Functions

In [3]:
def FeatureToGdf(feature_layer):
    """ Converts feature layer to geodataframe
    
    Parameters
    -----------
    feature_layer : ArcGIS API feature layer

    Returns
    -----------
    feature_gdf : geodataframe
    
    """
    import geopandas as gpd
    import json
    
    # .query() returns a FeatureSet
    fset = feature_layer.query()

    # get a GeoJSON string representation of the FeatureSet
    gjson_string = fset.to_geojson

    # read GeoJSON string into a dict
    gjson_dict = json.loads(gjson_string)

    # Read the dict into a gdf
    feature_gdf = gpd.GeoDataFrame.from_features(gjson_dict['features'])
    
    # Return GDF
    return feature_gdf

## Load aerial observations from EDF PermianMap

In [4]:
# Connect to AGOL
agol = GIS()

# Create blank list to store results
edf_aerial_obs_list = []

# Loop over EDF feature services
for index, edf_fs_id in edf_fs_ids.iterrows():
    
    # Get the data from the first layer in the service
    edf_aerial_obs_data = agol.content.get(edf_fs_id['id']).layers[0]

    # Convert the feature layer to a GDF
    edf_aerial_obs_gdf = FeatureToGdf(edf_aerial_obs_data)

    # Add the campaign field to the GDF
    edf_aerial_obs_gdf['obs_campaign'] = edf_fs_id['campaign']
    
    # Append the gdf to the list
    edf_aerial_obs_list.append(edf_aerial_obs_gdf)

# Combine all the GDF's
edf_aerial_obs_all_gdf = pd.concat(edf_aerial_obs_list, axis=0)

# Set the first_detection as datetime field type
edf_aerial_obs_all_gdf['first_detection'] = pd.to_datetime(edf_aerial_obs_all_gdf['first_detection'])

# Set the CRS
edf_aerial_obs_all_gdf = edf_aerial_obs_all_gdf.set_crs(epsg=3857)
edf_aerial_obs_all_gdf

Unnamed: 0,geometry,OBJECTID,source_id,source_lat,source_lon,source_type,source_type_label,number_overflights,source_persistence,confidence_in_persistence,...,sigma_qsource,number_detections,first_detection,last_detection,Operator_Alias,State,County,survey_,obs_campaign,ipcc
0,POINT (-11330412.807 3815944.091),3513,S21P_0001,32.400091,-101.782830,pipeline,Gathering & Boosting,4,0.250,0.683594,...,6.714042,1,2021-07-26 17:22:00+00:00,07/26/2021 17:22 UTC,WTG GAS PROCESSING,Texas,Martin,S21,Summer 2021 Sources GAO - CarbonMapper,
1,POINT (-11331714.132 3814245.204),3514,S21P_0002,32.387204,-101.794520,well,Production,4,0.250,0.683594,...,6.795775,1,2021-07-26 17:22:00+00:00,07/26/2021 17:22 UTC,ENDEAVOR ENERGY RESOURCES LP,Texas,Martin,S21,Summer 2021 Sources GAO - CarbonMapper,
2,POINT (-11330438.411 3800104.636),3515,S21P_0003,32.279873,-101.783060,compressor,Gathering & Boosting,4,0.250,0.683594,...,5.344691,1,2021-07-26 17:22:00+00:00,07/26/2021 17:22 UTC,WTG GAS PROCESSING,Texas,Martin,S21,Summer 2021 Sources GAO - CarbonMapper,
3,POINT (-11327799.025 3753107.361),3516,S21P_0004,31.922237,-101.759350,well,Production,4,0.250,0.683594,...,19.507476,1,2021-07-26 17:22:00+00:00,07/26/2021 17:22 UTC,CONOCOPHILLIPS CO,Texas,Glasscock,S21,Summer 2021 Sources GAO - CarbonMapper,
4,POINT (-11324953.699 3750909.924),3517,S21P_0005,31.905481,-101.733790,pipeline,Gathering & Boosting,3,0.333,0.578125,...,20.691179,1,2021-07-26 17:22:00+00:00,07/26/2021 17:22 UTC,TARGA,Texas,Glasscock,S21,Summer 2021 Sources GAO - CarbonMapper,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1751,POINT (-11360794.123 3787392.830),1752,P01402,32.183278,-102.055750,compressor,Gathering & Boosting,1,1.000,0.250000,...,223.100000,1,2019-10-21 17:19:00+00:00,<Null>,WTG GAS PROCESSING,Texas,Martin,,Fall 2019 Sources detected by AVIRIS-NG and GA...,1B2 Oil & Natural Gas
1752,POINT (-11304709.137 3800313.860),1753,P01414,32.281462,-101.551930,compressor,Gathering & Boosting,2,0.500,0.438000,...,39.400000,1,2019-10-21 18:39:00+00:00,<Null>,WTG GAS PROCESSING,Texas,Howard,,Fall 2019 Sources detected by AVIRIS-NG and GA...,1B2 Oil & Natural Gas
1753,POINT (-11459413.173 3753581.612),1754,P01493,31.925853,-102.941660,pipeline,Gathering & Boosting,1,1.000,0.250000,...,29.900000,1,2019-10-23 17:45:00+00:00,<Null>,WTG GAS PROCESSING,Texas,Winkler,,Fall 2019 Sources detected by AVIRIS-NG and GA...,1B2 Oil & Natural Gas
1754,POINT (-11459963.091 3753633.551),1755,P01494,31.926249,-102.946600,pipeline,Gathering & Boosting,1,1.000,0.250000,...,55.200000,1,2019-10-23 17:45:00+00:00,<Null>,WTG GAS PROCESSING,Texas,Winkler,,Fall 2019 Sources detected by AVIRIS-NG and GA...,1B2 Oil & Natural Gas


## Check new aerial observations against compiled database on CVX MapHub

In [5]:
# Connect to Chevron MapHub
gis = GIS("https://maphub-amer.tst.azure.chevron.com/portal", client_id='bb6vn14P957pN6bV')
print("Logged in as: " + gis.properties.user.username)

Please sign in to your GIS and paste the code that is obtained below.
If a web browser does not automatically open, please navigate to the URL below yourself instead.
Opening web browser to navigate to: https://maphub-amer.tst.azure.chevron.com/portal/sharing/rest//oauth2/authorize?response_type=code&client_id=bb6vn14P957pN6bV&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&state=tEXc33a1hvrBJ5YmxjUEjOPxvmE99q
Enter code obtained on signing in using SAML: ········




Logged in as: ekqo


In [7]:
# Load aerial observations layer from external studies compiled feature service on MapHub
external_studies_compiled_item = gis.content.get(external_studies_compiled_fs_id)
external_studies_compiled_layers = external_studies_compiled_item.layers
aerial_obs_fset = external_studies_compiled_layers[0].query()
aerial_obs_fset.sdf

Unnamed: 0,objectid,fot_asset_name,child_obj_code,child_obj_desc,mcbu_gis_data_uploaded_date,child_lat,child_long,obs_id,obs_datetime,obs_lat,obs_long,obs_q,obs_sensor,obs_campaign,SHAPE
0,100,,,,NaT,,,,NaT,,,,,,"{""x"": -11361960.6853, ""y"": 3542934.8307000026,..."


In [8]:
# Select those rows in aerial_obs_csv that are not in aerial_obs_fset using obs_id field
new_rows_gdf = edf_aerial_obs_all_gdf[~edf_aerial_obs_all_gdf['source_id'].isin(aerial_obs_fset.sdf['obs_id'])]
new_rows_gdf

Unnamed: 0,geometry,OBJECTID,source_id,source_lat,source_lon,source_type,source_type_label,number_overflights,source_persistence,confidence_in_persistence,...,sigma_qsource,number_detections,first_detection,last_detection,Operator_Alias,State,County,survey_,obs_campaign,ipcc
0,POINT (-11330412.807 3815944.091),3513,S21P_0001,32.400091,-101.782830,pipeline,Gathering & Boosting,4,0.250,0.683594,...,6.714042,1,2021-07-26 17:22:00+00:00,07/26/2021 17:22 UTC,WTG GAS PROCESSING,Texas,Martin,S21,Summer 2021 Sources GAO - CarbonMapper,
1,POINT (-11331714.132 3814245.204),3514,S21P_0002,32.387204,-101.794520,well,Production,4,0.250,0.683594,...,6.795775,1,2021-07-26 17:22:00+00:00,07/26/2021 17:22 UTC,ENDEAVOR ENERGY RESOURCES LP,Texas,Martin,S21,Summer 2021 Sources GAO - CarbonMapper,
2,POINT (-11330438.411 3800104.636),3515,S21P_0003,32.279873,-101.783060,compressor,Gathering & Boosting,4,0.250,0.683594,...,5.344691,1,2021-07-26 17:22:00+00:00,07/26/2021 17:22 UTC,WTG GAS PROCESSING,Texas,Martin,S21,Summer 2021 Sources GAO - CarbonMapper,
3,POINT (-11327799.025 3753107.361),3516,S21P_0004,31.922237,-101.759350,well,Production,4,0.250,0.683594,...,19.507476,1,2021-07-26 17:22:00+00:00,07/26/2021 17:22 UTC,CONOCOPHILLIPS CO,Texas,Glasscock,S21,Summer 2021 Sources GAO - CarbonMapper,
4,POINT (-11324953.699 3750909.924),3517,S21P_0005,31.905481,-101.733790,pipeline,Gathering & Boosting,3,0.333,0.578125,...,20.691179,1,2021-07-26 17:22:00+00:00,07/26/2021 17:22 UTC,TARGA,Texas,Glasscock,S21,Summer 2021 Sources GAO - CarbonMapper,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1751,POINT (-11360794.123 3787392.830),1752,P01402,32.183278,-102.055750,compressor,Gathering & Boosting,1,1.000,0.250000,...,223.100000,1,2019-10-21 17:19:00+00:00,<Null>,WTG GAS PROCESSING,Texas,Martin,,Fall 2019 Sources detected by AVIRIS-NG and GA...,1B2 Oil & Natural Gas
1752,POINT (-11304709.137 3800313.860),1753,P01414,32.281462,-101.551930,compressor,Gathering & Boosting,2,0.500,0.438000,...,39.400000,1,2019-10-21 18:39:00+00:00,<Null>,WTG GAS PROCESSING,Texas,Howard,,Fall 2019 Sources detected by AVIRIS-NG and GA...,1B2 Oil & Natural Gas
1753,POINT (-11459413.173 3753581.612),1754,P01493,31.925853,-102.941660,pipeline,Gathering & Boosting,1,1.000,0.250000,...,29.900000,1,2019-10-23 17:45:00+00:00,<Null>,WTG GAS PROCESSING,Texas,Winkler,,Fall 2019 Sources detected by AVIRIS-NG and GA...,1B2 Oil & Natural Gas
1754,POINT (-11459963.091 3753633.551),1755,P01494,31.926249,-102.946600,pipeline,Gathering & Boosting,1,1.000,0.250000,...,55.200000,1,2019-10-23 17:45:00+00:00,<Null>,WTG GAS PROCESSING,Texas,Winkler,,Fall 2019 Sources detected by AVIRIS-NG and GA...,1B2 Oil & Natural Gas


## Load new aerial observations into compiled database

### Perform spatial join of new aerial observations against MCBU facilities data

In [9]:
# Load the MCBU facilities data into a GDF
mcbu_facilities = gis.content.get(facilities_layer_id)

# Create blank list to store results
mcbu_facilities_list = []

# Iterate over the layers in the collection
for layer in mcbu_facilities.layers:

    # Check layer type to only process points
    if layer.properties.geometryType == 'esriGeometryPoint':
        
        # Convert the feature layer to a GDF
        facilities_gdf = FeatureToGdf(layer)

        # Append the gdf to the list
        mcbu_facilities_list.append(facilities_gdf)
        
    else:
        continue
    
# Combine all the df's into a single df for all the years
mcbu_facilities_gdf = pd.concat(mcbu_facilities_list, axis=0)

# Format the date fields as datetime
mcbu_facilities_gdf['MCBU_GIS_DATA_RECEIVED_DATE'] = pd.to_datetime(
    mcbu_facilities_gdf['MCBU_GIS_DATA_RECEIVED_DATE'], unit='ms')

mcbu_facilities_gdf['MCBU_GIS_DATA_UPLOADED_DATE'] = pd.to_datetime(
    mcbu_facilities_gdf['MCBU_GIS_DATA_UPLOADED_DATE'], unit='ms')

# Set the CRS
mcbu_facilities_gdf = mcbu_facilities_gdf.set_crs(epsg=4267)
#mcbu_facilities_gdf

In [10]:
# Reproject to UTM Zone 14N
new_rows_gdf = new_rows_gdf.to_crs(epsg=32614)
mcbu_facilities_gdf = mcbu_facilities_gdf.to_crs(epsg=32614)

# Use spatial join to find nearest facility to each aerial observation
new_rows_joined_gdf = gpd.sjoin_nearest(
    new_rows_gdf, mcbu_facilities_gdf, how='inner', max_distance=137)

# Drop multiple results for the same location
# sjoin_nearest will return multiple results if points are co-located
# MCBU facility data has co-located data; e.g. well and salt water disposal on same point 
new_rows_joined_gdf = new_rows_joined_gdf.drop_duplicates(subset = ['source_id'])
new_rows_joined_gdf

Unnamed: 0,geometry,OBJECTID_left,source_id,source_lat,source_lon,source_type,source_type_label,number_overflights,source_persistence,confidence_in_persistence,...,BTU_RATE,HP,TANK_ENG_SERVICE,MCBU_GIS_CHILD_CLASS_SYBMOL,MCBU_GIS_DATA_UPLOADED_BY,MCBU_GIS_DATA_SOURCE_NAME,MCBU_GIS_DATA_SOURCE_TYPE,MCBU_GIS_DATA_REMARKS,MCBU_GIS_DATA_RECEIVED_DATE,MCBU_GIS_DATA_UPLOADED_DATE
206,POINT (17421.057 3548911.341),3719,S21P_0214,31.9736,-104.10462,well,Production,3,0.333,0.578125,...,,,,WELL,svc-mcbu-gis-fmeT,FDS DATABASE,MS_SQL_VIEW,,2022-04-11,2022-04-11
215,POINT (33938.713 3558245.468),3728,S21P_0223,32.0644,-103.93497,pipeline,Gathering & Boosting,3,0.333,0.578125,...,,,,DISPOSAL,svc-mcbu-gis-fmeT,FDS DATABASE,MS_SQL_VIEW,,2022-04-11,2022-04-11
311,POINT (33936.752 3558244.034),4890,F21P_0312,32.064386,-103.93499,pipeline,Gathering & Boosting,5,0.2,0.762695,...,,,,DISPOSAL,svc-mcbu-gis-fmeT,FDS DATABASE,MS_SQL_VIEW,,2022-04-11,2022-04-11
364,POINT (19516.673 3535361.093),3877,S21P_0372,31.852721,-104.07582,well,Production,2,0.5,0.4375,...,,,,WELL,svc-mcbu-gis-fmeT,FDS DATABASE,MS_SQL_VIEW,,2022-04-11,2022-04-11
431,POINT (52695.459 3575495.230),3944,S21P_0440,32.22706,-103.745,compressor,Gathering & Boosting,2,0.5,0.4375,...,,,,COMPRESSOR STATION,svc-mcbu-gis-fmeT,FDS DATABASE,MS_SQL_VIEW,,2022-04-11,2022-04-11
21,POINT (52710.445 3575476.014),4600,F21P_0022,32.226893,-103.744833,compressor,Gathering & Boosting,4,1.0,0.683594,...,,,,COMPRESSOR STATION,svc-mcbu-gis-fmeT,FDS DATABASE,MS_SQL_VIEW,,2022-04-11,2022-04-11
522,POINT (213681.641 3527556.924),4035,S21P_0531,31.847799,-102.02564,pipeline,Gathering & Boosting,3,0.333,0.578125,...,,,,BATTERY,svc-mcbu-gis-fmeT,FDS DATABASE,MS_SQL_VIEW,,2022-04-11,2022-04-11
0,POINT (13379.875 3562949.413),4579,F21P_0001,32.09801,-104.1543,well,Production,4,0.25,0.683594,...,,,,WELL,svc-mcbu-gis-fmeT,FDS DATABASE,MS_SQL_VIEW,,2022-04-11,2022-04-11
140,POINT (64932.827 3552297.248),4719,F21P_0141,32.02326,-103.60504,well,Production,3,0.333,0.578125,...,,,,BATTERY,svc-mcbu-gis-fmeT,FDS DATABASE,MS_SQL_VIEW,,2022-04-11,2022-04-11
216,POINT (221098.531 3515836.321),4795,F21P_0217,31.744034,-101.944,compressor,Gathering & Boosting,4,0.25,0.683594,...,,,,COMPRESSOR STATION,svc-mcbu-gis-fmeT,FDS DATABASE,MS_SQL_VIEW,,2022-04-11,2022-04-11


In [11]:
# Assign facility lat/lon to geometry for new rows
new_rows_joined_facilityCoord_gdf = gpd.GeoDataFrame(
    new_rows_joined_gdf, geometry=gpd.points_from_xy(new_rows_joined_gdf.CHILD_LONG, new_rows_joined_gdf.CHILD_LAT, crs="EPSG:4267"))
# new_rows_joined_facilityCoord_gdf.style

In [12]:
# Reproject points to CRS of aerial observation feature layer
new_rows_joined_facilityCoord_gdf = new_rows_joined_facilityCoord_gdf.to_crs(
    {'init': 'epsg:' + str(aerial_obs_fset.spatial_reference['latestWkid'])})
#new_rows_joined_facilityCoord_gdf.style

  in_crs_string = _prepare_from_proj_string(in_crs_string)


### Create features from new rows

In [13]:
# Get an example feature
features_for_update = aerial_obs_fset.features[0]

# Get a template feature object
template_feature = deepcopy(features_for_update)

# List of features to be added
features_to_be_added = []

# loop through each row and add to the list of features to be added
for row in new_rows_joined_facilityCoord_gdf.iterrows():
    new_feature = deepcopy(template_feature)

    # Print status
    print("Creating entry for obs_id: " + row[1]['source_id'])

    # Format the geometry information dictionary
    output_geometry = {'x': row[1].geometry.x, 'y': row[1].geometry.y,
                       "spatialReference": {"wkid": aerial_obs_fset.spatial_reference['wkid'], "latestWkid": aerial_obs_fset.spatial_reference['latestWkid']}}

    # Assign the updated values
    new_feature.geometry = output_geometry
    new_feature.attributes['fot_asset_name'] = row[1]['FOT_ASSET_NAME']
    new_feature.attributes['child_obj_code'] = row[1]['CHILD_OBJ_CODE']
    new_feature.attributes['child_obj_desc'] = row[1]['CHILD_OBJ_DESC']
    new_feature.attributes['mcbu_gis_data_uploaded_date'] = int(
        datetime.timestamp(row[1]['MCBU_GIS_DATA_UPLOADED_DATE'])*1000)
    new_feature.attributes['child_lat'] = float(row[1]['CHILD_LAT'])
    new_feature.attributes['child_long'] = float(row[1]['CHILD_LONG'])
    new_feature.attributes['obs_id'] = row[1]['source_id']
    new_feature.attributes['obs_datetime'] = int(
        datetime.timestamp(row[1]['first_detection'])*1000)
    new_feature.attributes['obs_lat'] = float(row[1]['source_lat'])
    new_feature.attributes['obs_long'] = float(row[1]['source_lon'])
    new_feature.attributes['obs_q'] = float(row[1]['qsource'])
    new_feature.attributes['obs_sensor'] = 'GAO, CarbonMapper, AVIRISng'
    new_feature.attributes['obs_campaign'] = row[1]['obs_campaign']
    new_feature.attributes['SHAPE'] = output_geometry

    # add this to the list of features to be updated
    features_to_be_added.append(new_feature)

#features_to_be_added

Creating entry for obs_id: S21P_0214
Creating entry for obs_id: S21P_0223
Creating entry for obs_id: F21P_0312
Creating entry for obs_id: S21P_0372
Creating entry for obs_id: S21P_0440
Creating entry for obs_id: F21P_0022
Creating entry for obs_id: S21P_0531
Creating entry for obs_id: F21P_0001
Creating entry for obs_id: F21P_0141
Creating entry for obs_id: F21P_0217
Creating entry for obs_id: F21P_0370
Creating entry for obs_id: P00206
Creating entry for obs_id: P00213
Creating entry for obs_id: P00302
Creating entry for obs_id: P00308
Creating entry for obs_id: P00437
Creating entry for obs_id: P00736
Creating entry for obs_id: P01368
Creating entry for obs_id: P01477
Creating entry for obs_id: P01534
Creating entry for obs_id: P01765
Creating entry for obs_id: P00110
Creating entry for obs_id: P00580
Creating entry for obs_id: P01550
Creating entry for obs_id: P00734
Creating entry for obs_id: P01363


### Update the external studies compiled layer on MapHub

In [14]:
# Add the new features to the feature layer
external_studies_compiled_flayer = external_studies_compiled_layers[0]
external_studies_compiled_flayer.edit_features(adds = features_to_be_added)

{'addResults': [{'objectId': 101, 'success': True},
  {'objectId': 102, 'success': True},
  {'objectId': 103, 'success': True},
  {'objectId': 104, 'success': True},
  {'objectId': 105, 'success': True},
  {'objectId': 106, 'success': True},
  {'objectId': 107, 'success': True},
  {'objectId': 108, 'success': True},
  {'objectId': 109, 'success': True},
  {'objectId': 110, 'success': True},
  {'objectId': 111, 'success': True},
  {'objectId': 112, 'success': True},
  {'objectId': 113, 'success': True},
  {'objectId': 114, 'success': True},
  {'objectId': 115, 'success': True},
  {'objectId': 116, 'success': True},
  {'objectId': 117, 'success': True},
  {'objectId': 118, 'success': True},
  {'objectId': 119, 'success': True},
  {'objectId': 120, 'success': True},
  {'objectId': 121, 'success': True},
  {'objectId': 122, 'success': True},
  {'objectId': 123, 'success': True},
  {'objectId': 124, 'success': True},
  {'objectId': 125, 'success': True},
  {'objectId': 126, 'success': True}