## Convert Classic Esri StoryMap Series (Tabbed Layout)
Fetch JSON from an ArcGIS Online hosted Classic Esri Story Map Series App and convert each tab/bullet/accordion into its own ArcGIS StoryMap with the cover supressed. Once converted, each ArcGIS StoryMap will need to be opened in a browser tab in order to complete the Story Checker. Once all are published, an ArcGIS StoryMap Collection is created that contains the converted app to replicate the classic app look and feel. Note: Any entries that were hidden in the classic app will be published and will be visible by default. If it is desired that they not appear they can be removed from the Collection after publishing. Also, as there is not equivalent to the accordion layout, these layouts will be converted to the Tabbed format.

TO DO - create conversion tool for classic swipe (second tab in Katrina story)

## Setup

In [37]:
# Import packages, config, AGO authentication and helper functions
from bs4 import BeautifulSoup, NavigableString, Tag
from arcgis.apps.storymap import StoryMap, Themes, Image, Video, Audio, Embed, Map, Button, Text, Gallery, Timeline, Sidecar, Code, Table, TextStyles, Collection, CollectionNavigation
import arcgis
from arcgis.gis import GIS, Item
from PIL import Image as PILImage
from PIL import ImageStat
from io import BytesIO
from IPython.display import display
import pandas as pd
import matplotlib.colors as mcolors
import tempfile
import ipywidgets as widgets
from ipywidgets import IntProgress
import re, json, requests, sys, time, os, subprocess
from copy import deepcopy

# Set Pandas dataframe display options
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns',1000)

# Print Python and ArcGIS for Python versions
# since things can change between versions
print(f"Python version: ",sys.version)
print("ArcGIS for Python API / StoryMap module version: ",arcgis.__version__)

# Set to false if running locally and authenticate separately
agoNotebook = False

# Connect to ArcGIS Online
if agoNotebook:
    # Define the GIS
    gis = GIS("home")
    print("Successfully logged in as: " + gis.properties.user.username, "(role: " + gis.properties.user.role + ")")
    
## Helper functions

default_thumbnail_path = "https://cdn-a.arcgis.com/cdn/1BE082D/js/arcgis-app-components/arcgis-app/assets/arcgis-item-thumbnail/storymap.png"

# Collect map extents for troubleshooting thumbnail generation
map_extents = {}


def fetch_classic_storymap_data(classic_storymap_id, gis):
    classic_item = Item(gis=gis, itemid=classic_storymap_id)
    classic_data = Item.get_data(classic_item)
    if classic_data == {}:
        raise ValueError("ERROR: StoryMap to be converted must be hosted on ArcGIS Online.")
    elif isinstance(classic_data, dict):
        classic_item_data = classic_data
    else:
        classic_item_data = json.loads(classic_data)
    return classic_item, classic_item_data

def extract_story_settings(classic_item_data):
    settings = classic_item_data["values"]["settings"]
    title = classic_item_data["values"].get("title", "Untitled StoryMap")
    subtitle = classic_item_data["values"].get("subtitle", "")
    type = settings["layout"]["id"]
    panel_position = settings["layoutOptions"]["panel"]["position"]
    theme = settings["theme"]
    entries = classic_item_data["values"]["story"]["entries"]
    return title, subtitle, type, panel_position, theme, entries

def determine_theme(theme):
    classic_name = theme["colors"].get("name", "No classic theme name")
    group = theme["colors"]["group"]
    if group == "dark":
        return classic_name, Themes.OBSIDIAN
    elif group == "light":
        return classic_name, Themes.SUMMIT
    else:
        return classic_name, Themes.SUMMIT

def process_entry(gis, entry, default_thumbnail_path, extents_dict=None, entry_index=None):
    entry_title = entry.get("title")
    media_info = entry.get("media", {})
    media_type = media_info.get("type")
    main_stage_content = None
    thumbnail_path = None
    invalid_webmap = False
    image_url = None
    print_service_responses = None

    if media_type == "webmap":
        webmap_id = media_info.get('webmap', {}).get('id')
        webmap_from_json = build_webmap_from_json(gis, media_info)
        # Capture extent for troubleshooting
        extent = None
        if webmap_from_json and "mapOptions" in webmap_from_json and "extent" in webmap_from_json["mapOptions"]:
            extent = webmap_from_json["mapOptions"]["extent"]
        if extents_dict is not None:
            key = entry_index if entry_index is not None else entry_title
            extents_dict[key] = extent    
        if webmap_from_json:
            try:
                thumbnail_path, final_webmap_json, print_service_responses = create_webmap_thumbnail(webmap_json=webmap_from_json, default_thumbnail_path=default_thumbnail_path)
            except Exception as e:
                print(f"Error processing webmap {entry_title} ({webmap_id}): {e}")
                invalid_webmap = True
        if webmap_id and not invalid_webmap:
            try:
                main_stage_content = Map(webmap_id)
            except Exception as e:
                print(f"Error creating Map object for {entry_title} ({webmap_id}): {e}")
                invalid_webmap = True
    elif media_type == "webpage":
        webpage_url = media_info.get("webpage", {}).get("url")
        if webpage_url:
            main_stage_content = Embed(webpage_url)
    elif media_type == "image":
        image_url = media_info.get("image", {}).get("url")
        if image_url:
            main_stage_content = Image(image_url)
            thumbnail_path = create_image_thumbnail(image_url=image_url, default_thumbnail_path=default_thumbnail_path)

    if not thumbnail_path:
        thumbnail_path = create_image_thumbnail(image_url=default_thumbnail_path, default_thumbnail_path=default_thumbnail_path)

    return entry_title, main_stage_content, thumbnail_path, invalid_webmap, final_webmap_json, print_service_responses

def build_webmap_from_json(gis, media):
    """
    Build a minimal webmap JSON for the print service from a storymap entry's media property,
    using the basemap from the referenced webmap item if available.
    """
    # Default basemap (fallback)
    topo_basemap = {
        "baseMapLayers": [{
            "id": "World_Topo_Map",
            "layerType": "ArcGISTiledMapServiceLayer",
            "opacity": 1,
            "visibility": True,
            "url": "https://services.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer"
        }],
        "title": "Topographic"
    }

    imagery_basemap = {
        "baseMapLayers": [{
            "id": "World_Imagery",
            "layerType": "ArcGISTiledMapServiceLayer",
            "opacity": 1,
            "visibility": True,
            "url": "https://services.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer"
        }],
        "title": "Imagery"
    }

    baseMap = topo_basemap

    # Try to get basemap from the referenced webmap item
    if "webmap" in media and "id" in media["webmap"]:
        try:
            webmap_item = gis.content.get(media["webmap"]["id"])
            wm_data = webmap_item.get_data()
            if "baseMap" in wm_data and "baseMapLayers" in wm_data["baseMap"]:
                # Only keep required fields for each basemap layer
                baseMapLayers = []
                for lyr in wm_data["baseMap"]["baseMapLayers"]:
                    baseMapLayers.append({
                        "id": lyr.get("id", "basemap"),
                        "layerType": lyr.get("layerType", "ArcGISTiledMapServiceLayer"),
                        "opacity": lyr.get("opacity", 1),
                        "visibility": lyr.get("visibility", True),
                        "url": lyr.get("url")
                    })
                baseMap = {
                    "baseMapLayers": baseMapLayers,
                    "title": wm_data["baseMap"].get("title", "Basemap")
                }
        except Exception as e:
            print(f"Could not fetch basemap from webmap item: {e}. Using fallback basemap.")

    # Get extent and spatial reference from the referenced webmap
    extent = None
    spatialRef = {"wkid": 102100}
    if "webmap" in media:
        if "spatialReference" in media["webmap"]:
            spatialRef = media["webmap"]["spatialReference"]
        if "extent" in media["webmap"]:
            # Clamp the max extent to the spatial reference's max extent
            media["webmap"]["extent"] = clamp_extent_to_spatial_reference(media["webmap"]["extent"], spatialRef)
            extent = media["webmap"]["extent"]
    # Ensure extent is present and valid
    if not extent:
        extent = {
            "xmin": -20037508.342789244,
            "ymin": -20037508.342789244,
            "xmax": 20037508.342789244,
            "ymax": 20037508.342789244,
            "spatialReference": spatialRef
        }
    mapOptions = {"extent": extent}

    # Get layers from the webmap
    operationalLayers = []
    if "webmap" in media and "layers" in media["webmap"]:
        layers = media["webmap"]["layers"]
        if layers:
            for lyr in layers:
                # Try to get URL from referenced webmap item
                layer_url = lyr.get("url")
                if not layer_url and "id" in lyr and "id" in media["webmap"]:
                    try:
                        webmap_item = gis.content.get(media["webmap"]["id"])
                        wm_data = webmap_item.get_data()
                        for op_lyr in wm_data.get("operationalLayers", []):
                            if op_lyr.get("id") == lyr["id"]:
                                layer_url = op_lyr.get("url")
                                break
                    except Exception as e:
                        print(f"Could not fetch webmap item for layer URL lookup: {e}")
                if layer_url:
                    operationalLayers.append({
                        "id": lyr.get("id", "layer"),
                        "layerType": lyr.get("layerType", "ArcGISFeatureLayer"),
                        "url": layer_url,
                        "visibility": lyr.get("visibility", True),
                        "opacity": lyr.get("opacity", 1)
                    })

    # Export options for print service
    export_options = {"outputSize": [800, 600], "dpi": 96}

    webmap_json = {
        "baseMap": baseMap,
        "operationalLayers": operationalLayers,
        "spatialReference": spatialRef,
        "mapOptions": mapOptions,
        "exportOptions": export_options   
    }    
    
    # Invert the drawing order of operational layers
    if "operationalLayers" in webmap_json:
        webmap_json["operationalLayers"] = list(reversed(webmap_json["operationalLayers"]))

    return webmap_json

def clamp_extent_to_spatial_reference(extent, spatial_reference):
    """
    Clamp extent values to the valid range for the given spatial reference.
    Uses a lookup table for common WKIDs, otherwise tries pyproj for bounds.
    Installs pyproj if needed (conda preferred, pip fallback).
    """
    max_extents = {
        102100: {"xmin": -20037508.342789244, "ymin": -20037508.342789244, "xmax": 20037508.342789244, "ymax": 20037508.342789244},
        3857:   {"xmin": -20037508.342789244, "ymin": -20037508.342789244, "xmax": 20037508.342789244, "ymax": 20037508.342789244},
        4326:   {"xmin": -180, "ymin": -90, "xmax": 180, "ymax": 90},  # WGS84
        # Add more WKIDs as needed
    }

    wkid = None
    wkt = None
    if isinstance(spatial_reference, dict):
        wkid = spatial_reference.get("wkid")
        wkt = spatial_reference.get("wkt")
    elif isinstance(spatial_reference, int):
        wkid = spatial_reference
    if wkid in max_extents:
        max_extent = max_extents[wkid]
    else:
        print(f"WKID {wkid} not found in lookup table. Attempting to use pyproj to get bounds...")
        # Try to import pyproj, install if missing
        try:
            from pyproj import CRS
        except ImportError:
            print("pyproj not found. Attempting to install pyproj with conda...")
            try:
                subprocess.check_call([sys.executable, "-m", "conda", "install", "-y", "pyproj"])
            except Exception:
                print("conda install failed or not available. Trying pip install...")
                subprocess.check_call([sys.executable, "-m", "pip", "install", "pyproj"])
            from pyproj import CRS
        # Now try to get bounds
        max_extent = get_max_extent_from_wkid(wkid)
        if not max_extent:
            print(f"Could not determine bounds for WKID {wkid}. Extent not clamped.")
            return extent

    clamped_extent = {
        "xmin": max(min(extent.get("xmin", max_extent["xmin"]), max_extent["xmax"]), max_extent["xmin"]),
        "ymin": max(min(extent.get("ymin", max_extent["ymin"]), max_extent["ymax"]), max_extent["ymin"]),
        "xmax": max(min(extent.get("xmax", max_extent["xmax"]), max_extent["xmax"]), max_extent["xmin"]),
        "ymax": max(min(extent.get("ymax", max_extent["ymax"]), max_extent["ymax"]), max_extent["ymin"]),
        "spatialReference": spatial_reference
    }
    return clamped_extent

def get_max_extent_from_wkid(wkid):
    try:
        from pyproj import CRS
        crs = CRS.from_epsg(wkid)
        bounds = crs.area_of_use.bounds  # (west, south, east, north)
        return {
            "xmin": bounds[0],
            "ymin": bounds[1],
            "xmax": bounds[2],
            "ymax": bounds[3]
        }
    except Exception as e:
        print(f"Could not get bounds for WKID {wkid}: {e}")
        return None

def create_image_thumbnail(image_url, default_thumbnail_path):
    try:
        response = requests.get(image_url)
        img = PILImage.open(BytesIO(response.content))
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
        img.thumbnail((800, 600))
        img.save(temp_file.name)
        return temp_file.name
    except Exception:
        print("Thumbnail download failed; using default.")
        img = PILImage.open(BytesIO(requests.get(default_thumbnail_path).content))
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
        img.thumbnail((800, 600))
        img.save(temp_file.name)
        return temp_file.name

def remove_failed_service(webmap_json, failed_url):
    # Remove from operationalLayers
    if 'operationalLayers' in webmap_json:
        webmap_json['operationalLayers'] = [
            lyr for lyr in webmap_json['operationalLayers']
            if not lyr.get('url', '').startswith(failed_url)
        ]
    # Remove from baseMapLayers
    if 'baseMap' in webmap_json and 'baseMapLayers' in webmap_json['baseMap']:
        webmap_json['baseMap']['baseMapLayers'] = [
            lyr for lyr in webmap_json['baseMap']['baseMapLayers']
            if not lyr.get('url', '').startswith(failed_url)
        ]
    return webmap_json

# For downloads, use in-memory BytesIO where possible
def create_webmap_thumbnail(webmap_json, default_thumbnail_path):
    url = "https://utility.arcgisonline.com/arcgis/rest/services/Utilities/PrintingTools/GPServer/Export%20Web%20Map%20Task/execute"
    #webmap_json = webmap_item.get_data()
    webmap_json = webmap_json if isinstance(webmap_json, dict) else json.loads(webmap_json)
    webmap_json_copy = deepcopy(webmap_json)
    tried_urls = set()
    max_attempts = 10  # Prevent infinite loops

    # List to capture all print service responses
    print_service_responses = []
    final_webmap_json = None

    # Ensure exportOptions is set
    if 'exportOptions' not in webmap_json_copy:
        webmap_json_copy['exportOptions'] = {
            "outputSize": [800, 600],
            "dpi": 96
        }
    # Ensure mapOptions/extent is set
    if 'mapOptions' not in webmap_json_copy:
        webmap_json_copy['mapOptions'] = {}
    if 'extent' not in webmap_json_copy['mapOptions']:
        webmap_json_copy['mapOptions']['extent'] = webmap_json.get('mapOptions', {}).get('extent', webmap_json.get('initialState', {}).get('viewpoint', {}).get('targetGeometry'))

    for attempt in range(max_attempts):
        params = {
            "f": "json",
            "Web_Map_as_JSON": json.dumps(webmap_json_copy),
            "Format": "PNG32",
            "Layout_Template": "MAP_ONLY"
        }
        
        # Capture the final json sent to the print service for troubleshoorting
        final_webmap_json = deepcopy(webmap_json_copy)
        
        response = requests.post(url, data=params)
        result = response.json()

        # Capture the print service response for troubleshooting
        print_service_responses.append({
            "attempt": attempt + 1,
            "params": params,
            "status_code": response.status_code,
            "result": result
        })

        if 'results' in result:
            image_url = result['results'][0]['value']['url']
            img_response = requests.get(image_url)
            if img_response.status_code == 200:
                temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
                temp_file.write(img_response.content)
                temp_file.close()
                img = PILImage.open(temp_file.name)
                is_blank = is_blank_image(temp_file.name)
                if is_blank:
                    print("Generated thumbnail is blank; scaling extent and retrying.")
                    # Try to scale the extent if possible
                    extent = webmap_json_copy.get('mapOptions', {}).get('extent')
                    # if extent:
                    #     new_extent = scale_extent(extent, scale_factor=1.1)
                    #     webmap_json_copy['mapOptions']['extent'] = new_extent
                    #     webmap_json_copy['extent'] = new_extent
                    #     continue  # Retry with new extent
                    # else:
                    if not extent:
                        print("No extent found to scale; using default image.")
                        default_path = create_image_thumbnail(image_url=default_thumbnail_path, default_thumbnail_path=default_thumbnail_path)
                        return default_path, print_service_responses, final_webmap_json
                return temp_file.name, print_service_responses, final_webmap_json
            else:
                break  # No valid image, break and use default

        elif 'error' in result and 'details' in result['error']:
            # Try to extract the failed service URL
            failed_layer_detail = result['error']['details'][0]
            if ' at ' in failed_layer_detail:
                failed_service_url = failed_layer_detail.split(' at ')[-1]
                if failed_service_url in tried_urls:
                    break  # Prevent infinite loop if same URL keeps failing
                tried_urls.add(failed_service_url)
                webmap_json_copy = remove_failed_service(webmap_json_copy, failed_service_url)
                continue  # Try again with the updated JSON
            else:
                break  # Can't parse the failed URL, break and use default
        else:
            break  # No results and no error details, break and use default

    # If we reach here, fallback to default
    print("Thumbnail download failed; using default.")
    default_path = create_image_thumbnail(image_url=default_thumbnail_path, default_thumbnail_path=default_thumbnail_path)
    return default_path, print_service_responses, final_webmap_json

def is_blank_image(image_path, threshold=5):
    img = PILImage.open(image_path).convert('L')
    pixels = list(img.getdata())
    unique_values = set(pixels)
    # If only 1 or 2 unique values (e.g., all black, all white, or half black/half white), treat as blank
    if len(unique_values) <= 2:
        return True
    stat = ImageStat.Stat(img)
    return stat.stddev[0] < threshold  # fallback for nearly-uniform images

def scale_extent(extent, scale_factor=1.1):
    """
    Scales the extent by the given scale_factor (e.g., 1.1 for 10% larger).
    Extent should be a dict with xmin, ymin, xmax, ymax keys.
    """
    if not extent:
        return extent
    xmin, ymin, xmax, ymax = extent['xmin'], extent['ymin'], extent['xmax'], extent['ymax']
    x_center = (xmin + xmax) / 2
    y_center = (ymin + ymax) / 2
    width = (xmax - xmin) * scale_factor
    height = (ymax - ymin) * scale_factor
    new_xmin = x_center - width / 2
    new_xmax = x_center + width / 2
    new_ymin = y_center - height / 2
    new_ymax = y_center + height / 2
    return {'xmin': new_xmin, 'ymin': new_ymin, 'xmax': new_xmax, 'ymax': new_ymax, 'spatialReference': extent.get('spatialReference', {'wkid': 102100})}


def build_and_save_storymap(entry, entry_index, entry_title, main_stage_content, new_theme, thumbnail_path, default_thumbnail_path):
    media_info = entry.get("media", {})
    media_type = media_info.get("type")
    main_stage_content = main_stage_content
    story = StoryMap()
    story.theme(new_theme)
    sidecar = Sidecar(style="docked-panel")
    story.add(sidecar)

    description_html = entry.get("description", "")
    # Parse HTML and convert to StoryMap nodes
    content_nodes, content_image_metadata = convert_html_elements_to_storymap_node(parse_root_elements(description_html))
    # Add main stage content and text content to sidecar
    sidecar.add_slide(contents=content_nodes, media=main_stage_content)

    # Assign metadata to each image in contents
    for img, caption, alt, link in content_image_metadata:
        try:
            img.caption = caption
            img.alt_text = alt
            img.link = link
        except Exception as e:
            print(f"Error setting image metadata: {e}")

    # Set media properties
    if isinstance(main_stage_content, Map):
        # Set webmap properties. Map must be added to the story before setting viewpoint
        if media_type == "webmap":
            # Set the extent for the map stage
            extent_json = media_info.get('webmap', {}).get('extent')
            if extent_json:
                main_stage_content.set_viewpoint(extent=extent_json)  # Extent dict per docs
            # Set layer visibility 
            old_layers = media_info.get('webmap', {}).get('layers', [])
            if old_layers:
                if hasattr(main_stage_content, "map_layers"):
                    for new_lyr in main_stage_content.map_layers:
                        for old_lyr in old_layers:
                            if new_lyr['id'] == old_lyr['id']:
                                new_lyr['visible'] = old_lyr['visibility']
            elif "operationalLayers" in media_info.get('webmap', {}):
                old_layers = media_info.get('webmap', {}).get('operationalLayers', [])
                if hasattr(main_stage_content, "map_layers"):
                    for new_lyr in main_stage_content.map_layers:
                        for old_lyr in old_layers:
                            if 'id' in new_lyr and 'id' in old_lyr and new_lyr['id'] == old_lyr['id']:
                                new_lyr['visible'] = old_lyr['visibility']

    if isinstance(main_stage_content, Image):
        if caption:
            main_stage_content.caption = media_info.get("image", {}).get("caption", "")
        if alt:
            main_stage_content.alt_text = media_info.get("image", {}).get("alt", "")
        if link:
            main_stage_content.link = media_info.get("image", {}).get("link", "")
        # if display: # https://developers.arcgis.com/python/latest/api-reference/arcgis.apps.storymap.html#arcgis.apps.storymap.story_content.Image.display
        #    main_stage_content.display = display
        # if properties:
        #    main_stage_content.properties = properties

    # Set cover properties
    cover_properties = story.content_list[0]
    cover_properties.title = entry_title
    cover_properties.byline = ""
    cover_properties.date = "none"
    if not thumbnail_path or not os.path.isfile(thumbnail_path):
        thumbnail_path = default_thumbnail_path
    cover_properties.media = Image(thumbnail_path) 

    # Hide cover
    for k, v in story.properties['nodes'].items():
        if v['type'] == 'storycover':
            v['config'] = {'isHidden': 'true'}

    # Save and publish
    story.save(title=entry_title, tags=["auto-created"], publish=True)
    if hasattr(story, '_item'):
        published_story_item = story._item
        published_story_item.update(thumbnail=thumbnail_path)
        published_story_item_url = "https://storymaps.arcgis.com/stories/" + published_story_item.id
        print(f"{published_story_item_url} '{entry_title}' is staged for publishing. Click the link to complete.")
        return story, published_story_item
    else:
        print("Could not find item for story:", story.title)
        return story, None

def build_collection(classic_item, published_storymap_items, thumbnail_paths, classic_story_type, new_theme):
    collection = Collection()
    collection_title = classic_item.title
    for i, story in enumerate(published_storymap_items):
        try:
            if Item(gis=gis, itemid=story.itemid).get_data():
                collection.add(item=story, title=story.title, thumbnail=thumbnail_paths[i])
            else:
                print(f"There was a problem publishing '{story.title}'. Open the link {story.url}and try again.")
        except Exception as e:
            print(f"Error adding story to collection: {e}")
    # Set collection properties
    collection.content[0].title = collection_title
    collection.content[0].byline = ""
    collection.theme(new_theme)
    collection.content[1].type = classic_story_type
    # Set the Collection thumbnail to be the same as the classic story
    classic_thumbnail_path = download_thumbnail(Item(gis=gis, itemid=classic_item.itemid), default_thumbnail_path, gis)
    collection.content[1].media = Image(path=classic_thumbnail_path)
    collection.save(title=collection_title, tags=["auto-created"], publish=True)
    return collection_title, collection._url

######################################################

# Instead of using threading.Thread and stopevent, update progress directly after each major step
def update_progress(progressbar, value, description=''):
    progressbar.value = value
    progressbar.description = description if description else progressbar.description

def color_to_hex(color_value):
    color_value = color_value.strip()
    # Check for rgb() format
    rgb_match = re.match(r'rgb-?(\d+)-?(\d+)-?(\d+)', color_value, re.IGNORECASE)
    if rgb_match:
        r, g, b = map(int, rgb_match.groups())
        return '{:02X}{:02X}{:02X}'.format(r, g, b)
    # Check for named color
    try:
        return mcolors.CSS4_COLORS[color_value.lower()].upper()
    except ValueError:
        pass
    # Already hex
    if color_value.startswith('#') and len(color_value) == 7:
        return color_value.upper()
    return None

def convert_color_style_to_class(tag):
    # Check if tag has 'style' attribute with color
    style = tag.get('style', '')
    # Regex to find color property (hex, rgb, named colors)
    match = re.search(r'color\s*:\s*([^;]+)', style, re.IGNORECASE)
    if match:
        color_value = match.group(1).strip()
        # Convert hex (#XXXXXX) to class name, removing #
        if color_value.startswith('#'):
            class_color = f"sm-text-color-{color_value[1:].upper()}"
        else:
            # For rgb or named color, sanitize usable string (replace spaces/paren)
            sanitized = re.sub(r'[\s\(\)]', '', color_value).replace(',', '-')
            hex_color = color_to_hex(sanitized)
            class_color = f"sm-text-color-{hex_color}"
        # Remove color from style attribute
        new_style = re.sub(r'color\s*:\s*[^;]+;?', '', style, flags=re.IGNORECASE).strip()
        if new_style:
            tag['style'] = new_style
        else:
            del tag['style']
        # Add or append class attribute
        if 'class' in tag.attrs:
            tag['class'].append(class_color)
        else:
            tag['class'] = [class_color]

def process_html_colors_preserve_html(html_text):
    soup = BeautifulSoup(html_text, "html.parser")
    # Iterate over tags that can have styles: div, span, strong, em, p, etc.
    for tag in soup.find_all(True):
        convert_color_style_to_class(tag)
    return str(soup)

def convert_element_to_storymap_object(el):
    img_tag = el.find('img')
    if img_tag:
        src = img_tag.get("src")
        # Upgrade http to https if needed
        if src and src.startswith("http://"):
            src = "https://" + src[len("http://"):]
        alt = img_tag.get("alt", "")
        link = "" # TO DO handle occasions when image is intended to launch a link
        # Find figcaption in parent figure or div
        figcaption = ""
        # print("img_tag:", img_tag)
        parent_figure = img_tag.find_parent("figure")
        # print("parent_figure:", parent_figure)
        if parent_figure:
            caption_tag = parent_figure.find("figcaption")
            # print("caption_tag:", caption_tag)
            if caption_tag:
                figcaption = caption_tag.get_text(strip=True)
        else:
            # Try to find figcaption in the parent div
            parent_div = img_tag.find_parent("div")
            # print("parent_div:", parent_div)
            if parent_div:
                caption_tag = parent_div.find("figcaption")
                # print("caption_tag (div):", caption_tag)
                if caption_tag:
                    figcaption = caption_tag.get_text(strip=True)
        # print("Extracted figcaption:", figcaption, type(figcaption))
        img = Image(path=src)
        #img.link = link
        #img.image = src
        return img, figcaption, alt, link

    tag_name = el.name
    if tag_name == "p": # or tag_name in ["span", "strong", "em", "div"]:
        # Extract inner HTML preserving inline styles
        inner_html = ''.join(str(c) for c in el.contents)
        processed_html = process_html_colors_preserve_html(inner_html)
        return Text(text=processed_html, style=TextStyles.PARAGRAPH)

    elif tag_name == "video":
        src = el.get("src")
        alt = el.get("alt", "")
        vid = Video(path=src)
        vid.alt_text = alt
        vid.caption = "" # TO DO try to find Classic stories that have Videos with captions
        vid.video = src # Assign video property. TO DO fix this for hosted videos
        return vid
    
    elif tag_name == "audio":
        src = el.get("src")
        alt = el.get("alt", "")
        aud = Audio(path=src)
        aud.alt_text = alt
        aud.caption = "" # TO DO try to find Classic stories that have Audio with captions
        aud.audio = src # Assign Audio property. TO DO fix this for hosted videos
        return aud
    
    elif tag_name == "iframe" or tag_name == "embed":
        src = el.get("src") or el.get("data-src")
        alt = el.get("alt", "")
        if src:
            emb = Embed(path=src)
            emb.alt_text = alt
            emb.caption = "" # TO DO try to find Classic stories that have Embeds with captions
            emb.link = src
        return emb

    elif tag_name == "map":
        src = el.get("src")
        alt = el.get("alt", "")
        extent = "" #TO DO get extent
        layers = "" # TO DO get map layers
        mp = Map(item="")
        mp.alt_text = alt
        mp.caption = "" # TO DO try to find Classic stories that have Maps in Sidecar panel with captions
        mp.map = src
        mp.map_layers = layers 
        mp.set_viewpoint = extent
        return aud
    
    else:
        # Fallback for unsupported or unknown types - treat as text
        inner_html = ''.join(str(c) for c in el.contents)
        processed_html = process_html_colors_preserve_html(inner_html)
        return Text(text=processed_html, style=TextStyles.PARAGRAPH)

def parse_root_elements(html_snippet):
    soup = BeautifulSoup(html_snippet, "html.parser")
    html_elements = []
    for child in soup.contents:
        if not getattr(child, 'name', None):
            continue

        # If this is a <figure> with an <img>, add the whole figure
        if child.name == "figure" and child.find('img'):
            html_elements.append(child)
            continue

        # Check if the parent itself is meaningful
        has_text = child.get_text(strip=True) != ""
        has_img = child.find('img') is not None
        has_video = child.find('video') is not None
        has_audio = child.find('audio') is not None
        has_iframe = child.find('iframe') is not None
        has_embed = child.find('embed') is not None
        has_map = child.find('map') is not None
        is_meaningful = has_text or has_img or has_video or has_audio or has_iframe or has_embed or has_map

        # Check for meaningful children
        meaningful_children = []
        for c in child.children:
            if not getattr(c, 'name', None):
                continue
            c_has_text = c.get_text(strip=True) != ""
            c_has_img = c.find('img') is not None
            c_has_video = c.find('video') is not None
            c_has_audio = c.find('audio') is not None
            c_has_iframe = c.find('iframe') is not None
            c_has_embed = c.find('embed') is not None
            c_has_map = c.find('map') is not None
            if c_has_text or c_has_img or c_has_video or c_has_audio or c_has_iframe or c_has_embed or c_has_map:
                meaningful_children.append(c)

        # If there are meaningful children, add them
        if meaningful_children:
            html_elements.extend(meaningful_children)
            # Optionally, if the parent is also meaningful and not just a container, add it too
            # If you want to avoid duplicates, only add children
            continue

        # If no meaningful children, but parent is meaningful, add parent
        if is_meaningful:
            html_elements.append(child)

    return html_elements


# def parse_nested_elements(html_snippet):
#     soup = BeautifulSoup(html_snippet, "html.parser")
#     soup_list = [child for child in soup.contents if getattr(child, 'name', None)]
#     html_elements = []
#     for element in soup_list:
#         for c in element:
#             if getattr(c, 'name', None):
#                 html_elements.append(c)
#     return html_elements

def convert_html_elements_to_storymap_node(html_elements):
    content_nodes = []
    image_metadata = []  # To store (img, caption, alt, link) tuples
    for el in html_elements:
        node = convert_element_to_storymap_object(el)
        if isinstance(node, tuple):
            img, caption, alt, link = node
            content_nodes.append(img)
            image_metadata.append((img, caption, alt, link))
        elif node:
            content_nodes.append(node)
    return content_nodes, image_metadata

# def get_thumbnail_path(path):
#     try:
#         if path and isinstance(path, str):
#             # Try local file check, otherwise use default
#             with open(path, 'rb'):
#                 return path
#     except Exception:
#         pass
#     # Use default thumbnail path hosted online
#     return default_thumbnail_path
    
# For downloads, use in-memory BytesIO where possible
def download_thumbnail(webmap_item, default_thumbnail_path, gis=None):
    try:
        url = f"{webmap_item._portal.resturl}content/items/{webmap_item.id}/info/{webmap_item.thumbnail}"
        token = gis._con.token if gis else None
        params = {'token': token} if token else {}
        response = requests.get(url, params=params)
        img = PILImage.open(BytesIO(response.content))
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
        img.save(temp_file.name)
        return temp_file.name
    except Exception:
        print("Thumbnail download failed; using default.")
        url = default_thumbnail_path
        response = requests.get(url)
        img = PILImage.open(BytesIO(response.content))
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
        img.save(temp_file.name)
        return temp_file.name

#def ensure_local_thumbnail(thumbnail_path, local_filename="default_storymap_thumbnail.png"):
#    # If it's already a local file, just return it
#    if thumbnail_path: # and os.path.isfile(thumbnail_path):
#        return thumbnail_path
#    # Otherwise, download it
#    response = requests.get(thumbnail_path)
#    with open(local_filename, "wb") as f:
#        f.write(response.content)
#    return local_filename

#def create_image_thumbnail(image_url, thumbnail_path):
#    response = requests.get(image_url)
#    img = PILImage.open(BytesIO(response.content))
#    img.thumbnail((800, 600))
#    img.save(thumbnail_path)
#    return thumbnail_path

Python version:  3.12.11 | packaged by Anaconda, Inc. | (main, Jun  5 2025, 12:58:53) [MSC v.1929 64 bit (AMD64)]
ArcGIS for Python API / StoryMap module version:  2.4.1.3


In [2]:
# Print the version of the arcgis module
print(f"Running ArcGIS API for Python version: {arcgis.__version__}")
agoNotebook = False
# Define the GIS
if agoNotebook == False:
    try:
        import keyring
        service_name = "system" # Use the default local credential store
        success = False # Set initial state

        # Ask for the username
        while success == False:
            username_for_keyring = input("Enter your ArcGIS Online username:") # If you are using VS Code, the text input dialog box appears at the top of the window
            # Get the credential object
            credential = keyring.get_credential(service_name, username_for_keyring)
            # Check if the username is in the credential store
            if credential is None:
                print(f"'{username_for_keyring}' is not in the local system's credential store. Try another username.")
            # Retrieve the password, login and set the GIS portal
            else:
                password_from_keyring = keyring.get_password("system", username_for_keyring)
                portal_url = 'https://www.arcgis.com'  
                gis = GIS(portal_url, username=username_for_keyring, password=password_from_keyring)
                success = True
                # Print a success message with username and user's organization role
                print(f"Successfully logged in as: {gis.properties.user.username} (role: {gis.properties.user.role} userType: {gis.properties.user.userLicenseTypeId})")
    except ImportError:
        print("The 'keyring' module is not installed. Please install it using 'pip install keyring'.")
        print("Before re-running this cell, open a command line window on your machine and run the command:")
        print("# python -m keyring set system <your_ago_username>")
        print("If using Windows Powershell, use:")
        print("# ./python -m keyring set system <your_ago_username>")
        print("You will be prompted to enter your password")
        print("When you hit Enter/Return the password will be saved to your local credential store.")
else:
    gis = GIS("home")

Running ArcGIS API for Python version: 2.4.1.3
Successfully logged in as: dasbury_storymaps (role: org_admin userType: GISProfessionalAdvUT)


## 1. Input the Classic StoryMap ID

In [3]:
# Cell 1: Input the classic StoryMap ID
input_param2 = widgets.Text(value="d1799fc84e244c2f9af0e24ced4c95e1", description="Item ID:", layout=widgets.Layout(width='400px')) # test value: 597d573e58514bdbbeb53ba2179d2359
user_line2 = widgets.HBox([widgets.Label(value="Paste 32-digit Classic Esri Story Map id -->"), input_param2]) # TO DO add error checking logic and warning if item is missing or input is incorrect
display(user_line2)

HBox(children=(Label(value='Paste 32-digit Classic Esri Story Map id -->'), Text(value='d1799fc84e244c2f9af0e2…

## 2. Fetch the Data
This cell fetches the classic StoryMap item and parses its JSON data.

In [4]:
# Cell 2: Fetch the classic StoryMap's data
classic_storymap_id = input_param2.value  # or set manually
classic_item, classic_item_data = fetch_classic_storymap_data(classic_storymap_id, gis)
if classic_item is None or classic_item_data is not None:
    print(f"Fetched classic StoryMap: '{classic_item.title}' (ID: {classic_item.itemid})")
else:
    print("Could not fetch classic StoryMap data. Check the item ID and try again.")

Fetched classic StoryMap: 'A World of Circles' (ID: d1799fc84e244c2f9af0e24ced4c95e1)


## 3. Parse Story Settings and Data

This cell extracts the theme, title, subtitle, and entries from the classic StoryMap data.

In [5]:
# Cell 3: Extract settings and entries
classic_story_title, classic_story_subtitle, classic_story_type, classic_story_panel_position, classic_story_theme, entries = extract_story_settings(classic_item_data)
if len(entries) == 1:
    print(f"{'panel position:':>15} {classic_story_panel_position}")
    print(f"{'series title:':>15} '{classic_story_title}'")
    if classic_story_subtitle:
        print(f"{'subtitle:':>15} {classic_story_subtitle}")
    print(f"{'series type:':>15} {classic_story_type}")
    print(f"\nFound {len(entries)} entry in the Classic Map Series.")
else:
    print(f"{'panel position:':>15} {classic_story_panel_position}")
    print(f"{'series title:':>15} '{classic_story_title}'")
    if classic_story_subtitle:
        print(f"{'subtitle:':>15} {classic_story_subtitle}")
    print(f"{'series type:':>15} {classic_story_type}")
    print(f"\nFound {len(entries)} entries in the Classic Map Series.")    
for i, e in enumerate(entries):
    print(f"{i+1}. {e['title']}")

panel position: left
  series title: 'A World of Circles'
   series type: bullet

Found 15 entries in the Classic Map Series.
1. Egmont National Park
2. Manicoagan Reservoir
3. Sun City
4. Center-pivot irrigation
5. Arc de Triomphe
6. Eye of the Desert
7. Victoria University
8. Great Circle Earthworks, Newark, Ohio
9. Arecibo Radio Observatory
10. Mount St. Helens, Washington
11. Kattenbroek, Netherlands
12. Tevatron
13. Stonehenge
14. Las Ventas, Madrid, Spain
15. Circles on the Earth


## 4. Determine StoryMap Theme

This cell determines the new StoryMap theme based on the classic theme group.

In [6]:
# Cell 4: Determine theme
classic_name, new_theme = determine_theme(classic_story_theme)
print(f"Classic theme name: {classic_name}")
print(f"{'New theme set to:':>19} {new_theme.name}")

Classic theme name: bullet-default-1-modified
  New theme set to: SUMMIT


## 5. Loop Through and Process Each Entry's Data

In [38]:
# Cell 5: Loop through entries to process media, content and thumbnails
entry_titles = [None] * len(entries)
main_stage_contents = [None] * len(entries)
thumbnail_paths = [None] * len(entries)
invalid_webmaps = [False] * len(entries)
final_webmap_jsons = [None] * len(entries)
print_service_responses = [False] * len(entries)
for i, entry in enumerate(entries):
    entry_titles[i], main_stage_contents[i], thumbnail_paths[i], invalid_webmaps[i], final_webmap_jsons[i], print_service_responses[i] = process_entry(gis, entry, default_thumbnail_path, extents_dict=map_extents, entry_index=i)
    if invalid_webmaps[i]:
        print(f"WARNING: There is a problem with the webmap in entry [{i+1} of {len(entries)}]: {entry_titles[i]}. Please fix before publishing the new StoryMap.")
    if type(main_stage_contents[i]).__name__ == "Map":
        webmap_id = entries[i].get("media", {}).get('webmap', {}).get('id')
        print(f"[{i+1} of {len(entries)}]: {entry_titles[i]:35} Media type: {type(main_stage_contents[i]).__name__} (id: {webmap_id})")
    elif type(main_stage_contents[i]).__name__ == "Embed":
        embed_url = entries[i].get("media", {}).get('webpage', {}).get('url')
        print(f"[{i+1} of {len(entries)}]: {entry_titles[i]:35} Media type: {type(main_stage_contents[i]).__name__} (link: {embed_url})")
    elif type(main_stage_contents[i]).__name__ == "Image":
        image_name = entries[i].get("media", {}).get('image', {}).get('title')
        print(f"[{i+1} of {len(entries)}]: {entry_titles[i]:35} Media type: {type(main_stage_contents[i]).__name__} (title: {image_name})")
    else:
        print(f"[{i+1} of {len(entries)}]: {entry_titles[i]:35} Media type: {type(main_stage_contents[i]).__name__}")

[1 of 15]: Egmont National Park                Media type: Map (id: 0bb11c0469f042b3afaf8b0d76572822)
[2 of 15]: Manicoagan Reservoir                Media type: Map (id: 0bb11c0469f042b3afaf8b0d76572822)
[3 of 15]: Sun City                            Media type: Map (id: 0bb11c0469f042b3afaf8b0d76572822)
[4 of 15]: Center-pivot irrigation             Media type: Map (id: 0bb11c0469f042b3afaf8b0d76572822)
Generated thumbnail is blank; scaling extent and retrying.
[5 of 15]: Arc de Triomphe                     Media type: Map (id: 0bb11c0469f042b3afaf8b0d76572822)
[6 of 15]: Eye of the Desert                   Media type: Map (id: 0bb11c0469f042b3afaf8b0d76572822)
Generated thumbnail is blank; scaling extent and retrying.
[7 of 15]: Victoria University                 Media type: Map (id: 0bb11c0469f042b3afaf8b0d76572822)
Generated thumbnail is blank; scaling extent and retrying.
[8 of 15]: Great Circle Earthworks, Newark, Ohio Media type: Map (id: 0bb11c0469f042b3afaf8b0d76572822)
Gener

## 6. Build an ArcGIS StoryMap with a Suppressed Cover Page for Each Entry

In [None]:
# Cell 6: Loop through each tab and create a StoryMap for each
published_storymap_items = []
print("\n***NOTICE*** You MUST click each link below to open the story in a new window. Check for errors, edit and continue publishing if necessary. ***NOTICE***\n\nIf you see an error message -- before troubleshooting further -— try just clicking the 'Publish' button. Doing so can fix many common issues.\n")
for i, entry in enumerate(entries):
    print(f"[{i+1} of {len(entries)}]... ",end="")
    story, published_story_item = build_and_save_storymap(entry, i, entry["title"], main_stage_contents[i], new_theme, thumbnail_paths[i], default_thumbnail_path)
    if published_story_item:
        published_storymap_items.append(published_story_item) 

## 7. Build a Collection from the Published StoryMaps

In [None]:
# Cell 7. Run the function to create the Collection
collection_title, collection_url = build_collection(classic_item, published_storymap_items, thumbnail_paths, classic_story_type, new_theme)
print(f"Collection created: {collection_title} {collection_url}")

In [39]:
data = final_webmap_jsons[0]
for d in final_webmap_jsons[0]:
    print(json.dumps(json.loads(d['params']['Web_Map_as_JSON']), indent=2))

{
  "baseMap": {
    "baseMapLayers": [
      {
        "id": "World_Imagery_2017",
        "layerType": "ArcGISTiledMapServiceLayer",
        "opacity": 1,
        "visibility": true,
        "url": "https://services.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer"
      }
    ],
    "title": "Imagery"
  },
  "operationalLayers": [],
  "spatialReference": {
    "wkid": 102100
  },
  "mapOptions": {
    "extent": {
      "xmin": -20037508.342789244,
      "ymin": -4782120.616062363,
      "xmax": -20037508.342789244,
      "ymax": -4744437.161117821,
      "spatialReference": {
        "wkid": 102100
      }
    }
  },
  "exportOptions": {
    "outputSize": [
      800,
      600
    ],
    "dpi": 96
  }
}


In [None]:
      "xmin": -20721638.00383418,
      "ymin": -4782120.616062363,
      "xmax": -20674208.827783294,
      "ymax": -4744437.161117821,
test_json = {
  "baseMap": {
    "baseMapLayers": [
      {
        "id": "World_Imagery_2017",
        "layerType": "ArcGISTiledMapServiceLayer",
        "opacity": 1,
        "visibility": True,
        "url": "https://services.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer"
      }
    ],
    "title": "Imagery"
  },
  "operationalLayers": [],
  "spatialReference": {
    "wkid": 102100
  },
  "mapOptions": {
    "extent": {
      "xmin": -20037508.342789244,
      "ymin": -20037508.342789244,
      "xmax": 20037508.342789244,
      "ymax": 20037508.342789244,
      "spatialReference": {
        "wkid": 102100
      }
    }
  },
  "exportOptions": {
    "outputSize": [
      800,
      600
    ],
    "dpi": 96
  }
}


In [None]:
test_url = "https://utility.arcgisonline.com/arcgis/rest/services/Utilities/PrintingTools/GPServer/Export%20Web%20Map%20Task/execute"
test_webmap = test_json
test_params = {
        "f": "json",
        "Web_Map_as_JSON": json.dumps(test_webmap),
        "Format": "PNG32",
        "Layout_Template": "MAP_ONLY"
        }

response = requests.post(test_url, data=test_params)
test_result = response.json()

if 'results' in test_result:
    image_url = test_result['results'][0]['value']['url']
    img_response = requests.get(image_url)
    if img_response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
        temp_file.write(img_response.content)
        temp_file.close()
        img = PILImage.open(temp_file.name)
        print(temp_file.name)
        is_blank = is_blank_image(temp_file.name)
        if is_blank:
            print("Generated thumbnail is blank.")

In [41]:
entries[0]['media']

{'type': 'webmap',
 'webmap': {'id': '0bb11c0469f042b3afaf8b0d76572822',
  'extent': {'xmin': -20037508.342789244,
   'ymin': -4782120.616062363,
   'xmax': -20037508.342789244,
   'ymax': -4744437.161117821,
   'spatialReference': {'wkid': 102100}},
  'layers': [{'id': 'World_Dark_Gray_Base_Beta_4121', 'visibility': False},
   {'id': 'mapNotes_2673_0', 'visibility': False}],
  'popup': None,
  'legend': {'enable': False, 'openByDefault': False}}}

In [34]:
map2 = gis.map()
map2.basemap.basemap = 'imagery'
map2.extent = {
      "xmin": -20721638.00383418,
      "ymin": -4782120.616062363,
      "xmax": -20674208.827783294,
      "ymax": -4744437.161117821,
    "spatialReference": {"wkid": 102100}
}
display(map2)

Map(extent={'xmin': -20721638.00383418, 'ymin': -4782120.616062363, 'xmax': -20674208.827783294, 'ymax': -4744…

In [52]:
test_media = entries[0]['media']

# Create the map widget
m = gis.map()

# Set basemap if available
if 'webmap' in test_media:
    # Try to use the basemap from the webmap, fallback to 'topo'
    basemap = 'topo'
    try:
        webmap_id = test_media['webmap'].get('id')
        if webmap_id:
            webmap_item = gis.content.get(webmap_id)
            wm_data = webmap_item.get_data()
            if 'baseMap' in wm_data and 'title' in wm_data['baseMap']:
                basemap = wm_data['baseMap']['title'].lower()
    except Exception:
        pass
    m.basemap.basemap = basemap

    # Set extent if available
    if 'extent' in test_media['webmap']:
        m.extent = clamp_extent_to_spatial_reference(test_media['webmap']['extent'], test_media['webmap']['extent']['spatialReference'])

# Display the map
display(m)

# Set the output file format
file_format = "PNG32"

print_extent = m.extent

# Print the map to an image (returns a file path)
image_path = m.print(file_format=file_format, extent=print_extent)
print("Map image saved to:", image_path)

Map(extent={'xmin': -20037508.342789244, 'ymin': -4782120.616062363, 'xmax': -20037508.342789244, 'ymax': -474…

Map image saved to: https://utility.arcgisonline.com/arcgis/rest/directories/arcgisoutput/Utilities/PrintingTools_GPServer/x_____xr59HNaEjFL4YLQRWZFFavA..x_____x_ags_9754b29a-a621-11f0-9ace-0affc8ec20a7.png


In [51]:
print(m.extent)

{'spatialReference': {'wkid': 102100}, 'xmin': -20074580.301507503, 'ymin': -4786209.997075614, 'xmax': -20000436.384070985, 'ymax': -4740347.78010457}
