# CLSS Health Check Service

The purpose of this notebook is to validate the status of content that is registered in the data library. The notebook can be configured to run on a regular schedule (e.g., hourly) and will also be available to be run on-demand.

During operation, the process interrogates registered content to gather its status (i.e. operational/active, not operational) and updates the Item table in the CLSS Feature Service. 

**Prior to running the deployment script, update the featureServiceItemID variable to reflect the Item ID of the CLSS feature service deployed to your organization.**



In [1]:
## ## ## ## ## ## ## ## ## ## ## ## ## ## ## 
## Import Dependencies
## ## ## ## ## ## ## ## ## ## ## ## ## ## ## 
import urllib.request
from urllib.parse import urlparse
import urllib
import json
from arcgis import GIS
import datetime
import re

#### Credentials

In [2]:
# Connect to GIS
#username = ''
#password = ''
#gis = GIS("https://arcgis.com", username, password)
gis = GIS('home')



## Functions

In [3]:
def getItemSourceInfo(itemurl):
    """
    Function to return the status of online content,
    checking both HTTP status code and response content for errors

    Parameters
    ----------
    itemurl : string
        Web-accessible URL 

    Returns
    -------
        A list containing [HTTP status code, description, is_operational]
    """
    
    try:
        url = itemurl + '?f=pjson'

        try:
            with urllib.request.urlopen(url) as response:
                responseCode = response.getcode()
                
    
                if responseCode not in [200, 401, 403]:
                    return [responseCode, f'HTTP error: {responseCode}', False]
                
    
                if responseCode in [401, 403]:
                    return [responseCode, f'Authorization required ({responseCode}), but considered operational', True]
                
                # For 200 responses, we also need to check the content. An example could be a layer view id that doesn't exist but the response would still be 200 and the body would include an error
                try:
                    result = response.read().decode("utf-8")
                    

                    try:
                        data = json.loads(result)
                        

                        if 'error' in data:
                            error_code = data['error'].get('code', 'unknown')
                            error_details = '. '.join(data['error'].get('details', [])) if 'details' in data['error'] else str(data['error'])
                            return [error_code, f'JSON error: {error_details}', False]
                        
                        return [200, 'Operational', True]
                        
                    except json.JSONDecodeError:
                        return [200, 'Operational (non-JSON response)', True]
                        
                except Exception as e:
                    return [200, f'Content processing error: {str(e)}', False]

        except urllib.error.HTTPError as e:
            return [e.code, e.msg, e.code in [200, 401, 403]]

        except urllib.error.URLError as e:
            # URL error (DNS failure, connection refused, etc.)
            if hasattr(e, 'reason'):
                return ['unknown', str(e.reason), False]
            elif hasattr(e, 'code'):
                return [e.code, '', e.code in [200, 401, 403]]
            return ['unknown', 'URL Error', False]

    except Exception as e:
        # Catch-all for any other errors
        return ['unknown', str(e), False]

def is_valid_url(url):
    """Check if a string is a valid URL"""
    try:
        result = urlparse(url)
#         check if it includes http/https and network domain
        return all([result.scheme, result.netloc])
    except:
        return False

def process_items(clssItemTable, clssItems):
    """Process each item and update its status in the table"""
    batch_size = 100
    batch_updates = []
    updated_count = 0
    currentTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    for item in clssItems:
        # Retrieve attributes
        itemGlobalId = item.attributes['GlobalID']
        itemUrl = item.attributes['URL']
        itemName = item.attributes['Name']
        
        if itemUrl and is_valid_url(itemUrl):
            print(f'Processing: {itemName}')
            
            # Get status information
            statusCode, statusDesc, is_operational = getItemSourceInfo(itemUrl)
            print(f"Status: {statusCode} - {statusDesc} - Operational: {is_operational}")
            
            # Prepare update with appropriate health status
            if is_operational:
                detail = 'Item Healthcheck Successful'
                statusVal = 1
            else:
                detail = f'Service error: {statusCode}. {statusDesc}'
                statusVal = 0

            updateItem = {
                "attributes": {
                    'GlobalID': itemGlobalId,
                    'HealthcheckUpdate': str(currentTime),
                    'HealthcheckDetails': detail,
                    'HealthcheckStatus': statusVal
                }
            }

            batch_updates.append(updateItem)

            # Send batch if it reaches threshold
            if len(batch_updates) >= batch_size:
                try:
                    clssItemTable.edit_features(updates=batch_updates, use_global_ids=True)
                    updated_count += len(batch_updates)
                    batch_updates = []
                except Exception as e:
                    print(f"ERROR during batch update: {e}")
        else:
            print(f"Invalid URL: {itemUrl}")

    # Final flush of remaining items
    if batch_updates:
        try:
            clssItemTable.edit_features(updates=batch_updates, use_global_ids=True)
            updated_count += len(batch_updates)
        except Exception as e:
            print(f"ERROR during final batch update: {e}")

    print('> Total count of items updated:', updated_count)
    print('>> Health check complete.')
    return updated_count

## Process ITEMs in the CLSS Feature Service

1. Connect to the feature service containing items registered in the CLSS Data Library. 
2. Iterate through items and update the health check status


In [4]:
featureServiceItemID = "e345bbe0a23c42a19ebd52d01ac32831"

In [5]:
# Enter the Feature Service ID in the code below
clssService = gis.content.get(featureServiceItemID)
clssItemTable = clssService.tables[3]  ## ITEM table
clssItems = clssItemTable.query(where="Status = 1", out_fields="*")

In [6]:
process_items(clssItemTable, clssItems)

Processing: FEMA Open Shelters
Status: 200 - Operational - Operational: True
Processing: World Traffic Map
Status: 499 - JSON error:  - Operational: False
Processing: IPAWS Events
Status: 200 - Operational - Operational: True
Processing: EPA Facility Registry Service - Superfund Nation Priorities List (SEMS_NPL)
Status: 200 - Operational - Operational: True
Processing: Hospitals Medical Centers
Status: 200 - Operational - Operational: True
Processing: Outer Continental Shelf Oil and Natural Gas Platforms - Gulf of America Region
Status: 200 - Operational - Operational: True
Processing: Nuclear Power Plants
Status: 200 - Operational - Operational: True
Processing: State Activation Levels
Status: 200 - Operational - Operational: True
Processing: National Bridge Inventory
Status: 200 - Operational - Operational: True
Processing: National Inventory of Dams
Status: 200 - Operational - Operational: True
Processing: Emergency Shelter Population
Status: 200 - Operational - Operational: True
Pr

17