# Import CH4 aerial observations from csv into compiled database

## Resources
http://localhost:8888/notebooks/misc/arcgis-python-api/guide/04-feature-data-and-analysis/editing-features.ipynb

https://developers.arcgis.com/python/guide/accessing-and-creating-content/

http://localhost:8888/notebooks/misc/arcgis-python-api/samples/05_content_publishers/updating_features_in_a_feature_layer.ipynb

## Import libraries and setup

In [1]:
import os
from datetime import datetime
from copy import deepcopy

import pandas as pd
import geopandas as gpd
import arcgis
from arcgis.gis import GIS

In [49]:
# Define feature service id for MCBU CH4 external studies compiled
external_studies_compiled_fs_id = '73fcf310aa784460a9b102b1693a9314'

# Define feature service id for MCBU facilities dataset
facilities_layer_id = '857b2093bff44132b39e65176c309284'

# Define path to csv for new aerial observations
csv = 'MCBU_aerial_obs_3_24_2022.csv'

## Function to load feature layer into GDF

In [3]:
def FeatureToGdf(feature_layer):
    """ Converts feature layer to geodataframe
    
    Parameters
    -----------
    feature_layer : ArcGIS API feature layer

    Returns
    -----------
    feature_gdf : geodataframe
    
    """
    import geopandas as gpd
    import json
    
    # .query() returns a FeatureSet
    fset = feature_layer.query()

    # get a GeoJSON string representation of the FeatureSet
    gjson_string = fset.to_geojson

    # read GeoJSON string into a dict
    gjson_dict = json.loads(gjson_string)

    # Read the dict into a gdf
    feature_gdf = gpd.GeoDataFrame.from_features(gjson_dict['features'])
    
    # Return GDF
    return feature_gdf   

## Connect to Chevron MapHub

In [25]:
gis = GIS("https://maphub-amer.tst.azure.chevron.com/portal", client_id='bb6vn14P957pN6bV')
print("Logged in as: " + gis.properties.user.username)

Please sign in to your GIS and paste the code that is obtained below.
If a web browser does not automatically open, please navigate to the URL below yourself instead.
Opening web browser to navigate to: https://maphub-amer.tst.azure.chevron.com/portal/sharing/rest//oauth2/authorize?response_type=code&client_id=bb6vn14P957pN6bV&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&state=jt9uo4tOBK7NmyRwPBlsBf7A8k6h1q
Enter code obtained on signing in using SAML: ········




Logged in as: ekqo


## Load new aerial observations from CSV

In [38]:
# Read csv into a df
aerial_obs_csv = pd.read_csv(csv)

# Read df into gdf and set crs
aerial_obs_gdf = gpd.GeoDataFrame(
    aerial_obs_csv, geometry=gpd.points_from_xy(aerial_obs_csv.obs_long, aerial_obs_csv.obs_lat))
aerial_obs_gdf = aerial_obs_gdf.set_crs(epsg=4267)

# Set the obs_datetime as datetime field type
aerial_obs_gdf['obs_datetime'] = pd.to_datetime(aerial_obs_gdf['obs_datetime'])

aerial_obs_gdf

Unnamed: 0,obs_id,obs_datetime,obs_lat,obs_long,obs_q,obs_sensor,geometry
0,9Y84LZ6,2022-02-18 16:49:00,32.2987,-104.0665,230,GHGSat-C2,POINT (-104.06650 32.29870)
1,9XS28ET_1,2022-02-10 16:43:00,31.2719,-103.3187,2766,GHGSat-C2,POINT (-103.31870 31.27190)
2,9XS28ET_2,2022-02-10 16:43:00,31.272,-103.3115,1421,GHGSat-C2,POINT (-103.31150 31.27200)


## Check new aerial observations against compiled database

In [50]:
# Load aerial observations layer from external studies compiled feature service
external_studies_compiled_item = gis.content.get(external_studies_compiled_fs_id)
external_studies_compiled_layers = external_studies_compiled_item.layers
aerial_obs_fset = external_studies_compiled_layers[0].query()
aerial_obs_fset.sdf

Unnamed: 0,objectid,fot_asset_name,child_obj_code,child_obj_desc,mcbu_gis_data_uploaded_date,child_lat,child_long,obs_id,obs_datetime,obs_lat,obs_long,obs_q,obs_sensor,SHAPE
0,2,Carlsbad East,EC7ETBY,BATTERY; EL TORO CTB,2022-03-24 16:01:16,32.29915,-104.06561,9Y84LZ6,2022-03-24 16:13:42,32.2987,-104.0665,230.0,GHGSat-C2,"{""x"": -11584585.9475, ""y"": 3802659.0424000025,..."
1,7,Ft. Stockton,QV2WLFBN1BTY,BATTERY; WOLFBONE 1 CTB (AKA REEVES TXL 1 BTY),2022-04-06 07:00:00,31.271452,-103.318574,9XS28ET_1,2022-02-11 00:43:00,31.2719,-103.3187,2766.0,GHGSat-C2,"{""x"": -11501420.9435, ""y"": 3668070.8312000036,..."
2,8,Ft. Stockton,QV2RVSSJDCS,COMPRESSOR STATION; REEVES SJD SEC 15 CS,2022-04-06 07:00:00,31.271308,-103.312525,9XS28ET_2,2022-02-11 00:43:00,31.272,-103.3115,1421.0,GHGSat-C2,"{""x"": -11500747.556, ""y"": 3668052.080600001, ""..."


In [51]:
# Select those rows in aerial_obs_csv that are not in aerial_obs_fset using obs_id field
new_rows_gdf = aerial_obs_gdf[~aerial_obs_gdf['obs_id'].isin(aerial_obs_fset.sdf['obs_id'])]
new_rows_gdf

Unnamed: 0,obs_id,obs_datetime,obs_lat,obs_long,obs_q,obs_sensor,geometry


## Load new aerial observations into compiled database

### Perform spatial join of new aerial observations against MCBU facilities data

In [41]:
# Load the MCBU facilities data into a GDF
mcbu_facilities = gis.content.get(facilities_layer_id)

# Create blank list to store results
mcbu_facilities_list = []

# Iterate over the layers in the collection
for layer in mcbu_facilities.layers:

    # Check layer type to only process points
    if layer.properties.geometryType == 'esriGeometryPoint':
        
        # Convert the feature layer to a GDF
        facilities_gdf = FeatureToGdf(layer)

        # Append the gdf to the list
        mcbu_facilities_list.append(facilities_gdf)
        
    else:
        continue
    
# Combine all the df's into a single df for all the years
mcbu_facilities_gdf = pd.concat(mcbu_facilities_list, axis=0)

# Format the date fields as datetime
mcbu_facilities_gdf['MCBU_GIS_DATA_RECEIVED_DATE'] = pd.to_datetime(
    mcbu_facilities_gdf['MCBU_GIS_DATA_RECEIVED_DATE'], unit='ms')

mcbu_facilities_gdf['MCBU_GIS_DATA_UPLOADED_DATE'] = pd.to_datetime(
    mcbu_facilities_gdf['MCBU_GIS_DATA_UPLOADED_DATE'], unit='ms')

# Set the CRS
mcbu_facilities_gdf = mcbu_facilities_gdf.set_crs(epsg=4267)
#mcbu_facilities_gdf

In [42]:
# Use spatial join to find nearest facility to each aerial observation
new_rows_joined_gdf = gpd.sjoin_nearest(new_rows_gdf, mcbu_facilities_gdf, how='inner', max_distance=0.01)
new_rows_joined_gdf




Unnamed: 0,obs_id,obs_datetime,obs_lat,obs_long,obs_q,obs_sensor,geometry,index_right,OBJECTID,FOT_ASSET_NAME,...,BTU_RATE,HP,TANK_ENG_SERVICE,MCBU_GIS_CHILD_CLASS_SYBMOL,MCBU_GIS_DATA_UPLOADED_BY,MCBU_GIS_DATA_SOURCE_NAME,MCBU_GIS_DATA_SOURCE_TYPE,MCBU_GIS_DATA_REMARKS,MCBU_GIS_DATA_RECEIVED_DATE,MCBU_GIS_DATA_UPLOADED_DATE
1,9XS28ET_1,2022-02-10 16:43:00,31.2719,-103.3187,2766,GHGSat-C2,POINT (-103.31870 31.27190),80,38422399,Ft. Stockton,...,,,,BATTERY,svc-mcbu-gis-fmeT,FDS DATABASE,MS_SQL_VIEW,,2022-04-06,2022-04-06
2,9XS28ET_2,2022-02-10 16:43:00,31.272,-103.3115,1421,GHGSat-C2,POINT (-103.31150 31.27200),72,38421262,Ft. Stockton,...,,,,COMPRESSOR STATION,svc-mcbu-gis-fmeT,FDS DATABASE,MS_SQL_VIEW,,2022-04-06,2022-04-06


In [52]:
# Assign facility lat/lon to geometry for new rows
new_rows_joined_facilityCoord_gdf = gpd.GeoDataFrame(
    new_rows_joined_gdf, geometry=gpd.points_from_xy(new_rows_joined_gdf.CHILD_LONG, new_rows_joined_gdf.CHILD_LAT, crs="EPSG:" + str(new_rows_joined_gdf.crs.to_epsg())))
#new_rows_joined_facilityCoord_gdf.style

In [53]:
# Reproject points to CRS of aerial observation feature layer
new_rows_joined_facilityCoord_gdf = new_rows_joined_facilityCoord_gdf.to_crs(
    {'init': 'epsg:' + str(aerial_obs_fset.spatial_reference['latestWkid'])})
#new_rows_joined_facilityCoord_gdf.style

  in_crs_string = _prepare_from_proj_string(in_crs_string)


### Create features from new rows

In [54]:
# Get an example feature
features_for_update = aerial_obs_fset.features[0]

# Get a template feature object
template_feature = deepcopy(features_for_update)

# List of features to be added
features_to_be_added = []

# loop through each row and add to the list of features to be added
for row in new_rows_joined_facilityCoord_gdf.iterrows():
    new_feature = deepcopy(template_feature)

    # Print status
    print("Creating entry for obs_id: " + row[1]['obs_id'])

    # Format the geometry information dictionary
    output_geometry = {'x': row[1].geometry.x, 'y': row[1].geometry.y,
                       "spatialReference": {"wkid": aerial_obs_fset.spatial_reference['wkid'], "latestWkid": aerial_obs_fset.spatial_reference['latestWkid']}}

    # Assign the updated values
    new_feature.geometry = output_geometry
    new_feature.attributes['fot_asset_name'] = row[1]['FOT_ASSET_NAME']
    new_feature.attributes['child_obj_code'] = row[1]['CHILD_OBJ_CODE']
    new_feature.attributes['child_obj_desc'] = row[1]['CHILD_OBJ_DESC']
    new_feature.attributes['mcbu_gis_data_uploaded_date'] = int(
        datetime.timestamp(row[1]['MCBU_GIS_DATA_UPLOADED_DATE'])*1000)
    new_feature.attributes['child_lat'] = float(row[1]['CHILD_LAT'])
    new_feature.attributes['child_long'] = float(row[1]['CHILD_LONG'])
    new_feature.attributes['obs_id'] = row[1]['obs_id']
    new_feature.attributes['obs_datetime'] = int(
        datetime.timestamp(row[1]['obs_datetime'])*1000)
    new_feature.attributes['obs_lat'] = float(row[1]['obs_lat'])
    new_feature.attributes['obs_long'] = float(row[1]['obs_long'])
    new_feature.attributes['obs_q'] = float(row[1]['obs_q'])
    new_feature.attributes['obs_sensor'] = row[1]['obs_sensor']
    new_feature.attributes['SHAPE'] = output_geometry

    # add this to the list of features to be updated
    features_to_be_added.append(new_feature)

#features_to_be_added

Creating entry for obs_id: 9XS28ET_1
Creating entry for obs_id: 9XS28ET_2


### Update the aerial observations layer on MapHub

In [46]:
# Add the new features to the feature layer
aerial_obs_flayer = aerial_obs_layers[0]
aerial_obs_flayer.edit_features(adds = features_to_be_added)

{'addResults': [{'objectId': 7, 'success': True},
  {'objectId': 8, 'success': True}],
 'updateResults': [],
 'deleteResults': [],
 'attachments': {'addResults': [], 'updateResults': [], 'deleteResults': []}}