# Convert Classic Esri StoryMap Series (Tabbed Layout)
Fetch JSON from an ArcGIS Online hosted Classic Esri Story Map Series App and convert each tab into its own ArcGIS StoryMap with the cover supressed. Once converted, each ArcGIS StoryMap will open in a browser tab in order to complete the Story Checker. Once all are published, an ArcGIS StoryMap Collection is created that contains the converted app to replcate classic app look and feel.

TO DO - create conversion tool for classic swipe (second tab in Katrina story)

In [None]:
# Install the webcolors package if not already installed
!pip install webcolors

In [None]:
from bs4 import BeautifulSoup, NavigableString, Tag
from arcgis.apps.storymap import StoryMap, Themes, Image, Video, Audio, Embed, Map, Button, Text, Gallery, Timeline, Sidecar, Code, Table, TextStyles, Collection, CollectionNavigation
from arcgis.gis import GIS, Item
from IPython.display import display
import pandas as pd
import webcolors
import webbrowser
import re, json, requests, sys, time 

agoNotebook = False

# Set Pandas dataframe display options
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns',1000)

In [None]:
# Print Python and ArcGIS for Python versions
# since things can change between versions
import sys
print(f"Python version: ",sys.version)
import arcgis
print("ArcGIS for Python API / StoryMap module version: ",arcgis.__version__)

In [None]:
# Connect to ArcGIS Online
# Define the GIS
if agoNotebook == False:
    import keyring
    service_name = "system" # Use the default local credential store
    success = False # Set initial state

    # Ask for the username
    while success == False:
        username_for_keyring = input("Enter your ArcGIS Online username:") # If you are using VS Code, the text input dialog box appears at the top of the window
        # Get the credential object
        credential = keyring.get_credential(service_name, username_for_keyring)
        # Check if the username is in the credential store
        if credential is None:
            print(f"'{username_for_keyring}' is not in the local system's credential store. Try another username.")
        # Retrieve the password, login and set the GIS portal
        else:
            password_from_keyring = keyring.get_password("system", username_for_keyring)
            portal_url = 'https://www.arcgis.com'  
            gis = GIS(portal_url, username=username_for_keyring, password=password_from_keyring)
            success = True
            # Print a success message with username and user's organization role
            print("Successfully logged in as: " + gis.properties.user.username, "(role: " + gis.properties.user.role + ")")
else:
    gis = GIS("home")

In [None]:
# Define the Classic StoryMap item id
classic_storymap_id = '597d573e58514bdbbeb53ba2179d2359'
# Fetch the StoryMap Item from AGO
classic_item = Item(gis=gis,itemid=classic_storymap_id)
# Fetch the StoryMap data
classic_data = Item.get_data(classic_item)
if type(classic_data) == dict:
    classic_item_json = json.dumps(classic_data)
    classic_item_data = json.loads(classic_item_json)
else:
    classic_item_data = json.loads(classic_data)

In [None]:
# Helper functions
def color_to_hex(color_value):
    color_value = color_value.strip()
    # Check for rgb() format
    rgb_match = re.match(r'rgb-?(\d+)-?(\d+)-?(\d+)', color_value, re.IGNORECASE)
    if rgb_match:
        r, g, b = map(int, rgb_match.groups())
        return '{:02X}{:02X}{:02X}'.format(r, g, b)
    # Check for named color
    try:
        return webcolors.name_to_hex(color_value.lower())
    except ValueError:
        pass
    # Already hex
    if color_value.startswith('#') and len(color_value) == 7:
        return color_value.upper()
    return None

def convert_color_style_to_class(tag):
    # Check if tag has 'style' attribute with color
    style = tag.get('style', '')
    # Regex to find color property (hex, rgb, named colors)
    match = re.search(r'color\s*:\s*([^;]+)', style, re.IGNORECASE)
    if match:
        color_value = match.group(1).strip()
        # Convert hex (#XXXXXX) to class name, removing #
        if color_value.startswith('#'):
            class_color = f"sm-text-color-{color_value[1:].upper()}"
        else:
            # For rgb or named color, sanitize usable string (replace spaces/paren)
            sanitized = re.sub(r'[\s\(\)]', '', color_value).replace(',', '-')
            hex_color = color_to_hex(sanitized)
            class_color = f"sm-text-color-{hex_color}"
        # Remove color from style attribute
        new_style = re.sub(r'color\s*:\s*[^;]+;?', '', style, flags=re.IGNORECASE).strip()
        if new_style:
            tag['style'] = new_style
        else:
            del tag['style']
        # Add or append class attribute
        if 'class' in tag.attrs:
            tag['class'].append(class_color)
        else:
            tag['class'] = [class_color]

def process_html_colors_preserve_html(html_text):
    soup = BeautifulSoup(html_text, "html.parser")
    # Iterate over tags that can have styles: div, span, strong, em, p, etc.
    for tag in soup.find_all(True):
        convert_color_style_to_class(tag)
    return str(soup)

def convert_element_to_storymap_object(el):
    img_tag = el.find('img')
    if img_tag:
        src = img_tag.get("src")
        alt = img_tag.get("alt", "")
        link = "" # TO DO handle occasions when image is intended to launch a link
        # Find figcaption in parent figure or div
        figcaption = ""
        # print("img_tag:", img_tag)
        parent_figure = img_tag.find_parent("figure")
        # print("parent_figure:", parent_figure)
        if parent_figure:
            caption_tag = parent_figure.find("figcaption")
            # print("caption_tag:", caption_tag)
            if caption_tag:
                figcaption = caption_tag.get_text(strip=True)
        else:
            # Try to find figcaption in the parent div
            parent_div = img_tag.find_parent("div")
            # print("parent_div:", parent_div)
            if parent_div:
                caption_tag = parent_div.find("figcaption")
                # print("caption_tag (div):", caption_tag)
                if caption_tag:
                    figcaption = caption_tag.get_text(strip=True)
        # print("Extracted figcaption:", figcaption, type(figcaption))
        img = Image(path=src)
        #img.link = link
        #img.image = src
        return img, figcaption, alt, link

    tag_name = el.name
    if tag_name == "p": # or tag_name in ["span", "strong", "em", "div"]:
        # Extract inner HTML preserving inline styles
        inner_html = ''.join(str(c) for c in el.contents)
        processed_html = process_html_colors_preserve_html(inner_html)
        return Text(text=processed_html, style=TextStyles.PARAGRAPH)

    elif tag_name == "video":
        src = el.get("src")
        alt = el.get("alt", "")
        vid = Video(path=src)
        vid.alt_text = alt
        vid.caption = "" # TO DO try to find Classic stories that have Videos with captions
        vid.video = src # Assign video property. TO DO fix this for hosted videos
        return vid
    
    elif tag_name == "audio":
        src = el.get("src")
        alt = el.get("alt", "")
        aud = Audio(path=src)
        aud.alt_text = alt
        aud.caption = "" # TO DO try to find Classic stories that have Audio with captions
        aud.audio = src # Assign Audio property. TO DO fix this for hosted videos
        return aud
    
    elif tag_name == "iframe" or tag_name == "embed":
        src = el.get("src") or el.get("data-src")
        alt = el.get("alt", "")
        if src:
            emb = Embed(path=src)
            emb.alt_text = alt
            emb.caption = "" # TO DO try to find Classic stories that have Embeds with captions
            emb.link = src
        return emb

    elif tag_name == "map":
        src = el.get("src")
        alt = el.get("alt", "")
        extent = "" #TO DO get extent
        layers = "" # TO DO get map layers
        mp = Map(item="")
        mp.alt_text = alt
        mp.caption = "" # TO DO try to find Classic stories that have Maps in Sidecar panel with captions
        mp.map = src
        mp.map_layers = layers 
        mp.set_viewpoint = extent
        return aud
    
    else:
        # Fallback for unsupported or unknown types - treat as text
        inner_html = ''.join(str(c) for c in el.contents)
        processed_html = process_html_colors_preserve_html(inner_html)
        return Text(text=processed_html, style=TextStyles.PARAGRAPH)

# def parse_root_elements(html_snippet):
#     soup = BeautifulSoup(html_snippet, "html.parser")
#     html_elements = [child for child in soup.contents if getattr(child, 'name', None)]
#     return html_elements

# def parse_root_elements(html_snippet):
#     soup = BeautifulSoup(html_snippet, "html.parser")
#     html_elements = []
#     for child in soup.contents:
#         if not getattr(child, 'name', None):
#             continue
#         # Check for text content (ignoring whitespace)
#         has_text = child.get_text(strip=True) != ""
#         # Check for media tags
#         has_img = child.find('img') is not None
#         has_video = child.find('video') is not None
#         has_audio = child.find('audio') is not None
#         has_iframe = child.find('iframe') is not None
#         has_embed = child.find('embed') is not None
#         has_map = child.find('map') is not None
#         if has_text or has_img or has_video or has_audio or has_iframe or has_embed or has_map:
#             html_elements.append(child)
#     return html_elements

# def parse_root_elements(html_snippet, entry_index=None):
#     soup = BeautifulSoup(html_snippet, "html.parser")
#     html_elements = []
#     for child in soup.contents:
#         if not getattr(child, 'name', None):
#             continue
#         # If this tag has meaningful children, add them individually
#         meaningful_children = []
#         for c in child.children:
#             if not getattr(c, 'name', None):
#                 continue
#             has_text = c.get_text(strip=True) != ""
#             has_img = c.find('img') is not None
#             has_video = c.find('video') is not None
#             has_audio = c.find('audio') is not None
#             has_iframe = c.find('iframe') is not None
#             has_embed = c.find('embed') is not None
#             has_map = c.find('map') is not None
#             if has_text or has_img or has_video or has_audio or has_iframe or has_embed or has_map:
#                 meaningful_children.append(c)
#         # If we found meaningful children, add them instead of the parent
#         if meaningful_children:
#             html_elements.extend(meaningful_children)
#             continue  # skip adding the parent
#     return html_elements

        # Otherwise, fall through to normal logic

        # # Default logic (for all other entries)
        # has_text = child.get_text(strip=True) != ""
        # has_img = child.find('img') is not None
        # has_video = child.find('video') is not None
        # has_audio = child.find('audio') is not None
        # has_iframe = child.find('iframe') is not None
        # has_embed = child.find('embed') is not None
        # has_map = child.find('map') is not None
        # if has_text or has_img or has_video or has_audio or has_iframe or has_embed or has_map:
        #     html_elements.append(child)

def parse_root_elements(html_snippet):
    soup = BeautifulSoup(html_snippet, "html.parser")
    html_elements = []
    for child in soup.contents:
        if not getattr(child, 'name', None):
            continue

        # Check if the parent itself is meaningful
        has_text = child.get_text(strip=True) != ""
        has_img = child.find('img') is not None
        has_video = child.find('video') is not None
        has_audio = child.find('audio') is not None
        has_iframe = child.find('iframe') is not None
        has_embed = child.find('embed') is not None
        has_map = child.find('map') is not None
        is_meaningful = has_text or has_img or has_video or has_audio or has_iframe or has_embed or has_map

        # Check for meaningful children
        meaningful_children = []
        for c in child.children:
            if not getattr(c, 'name', None):
                continue
            c_has_text = c.get_text(strip=True) != ""
            c_has_img = c.find('img') is not None
            c_has_video = c.find('video') is not None
            c_has_audio = c.find('audio') is not None
            c_has_iframe = c.find('iframe') is not None
            c_has_embed = c.find('embed') is not None
            c_has_map = c.find('map') is not None
            if c_has_text or c_has_img or c_has_video or c_has_audio or c_has_iframe or c_has_embed or c_has_map:
                meaningful_children.append(c)

        # If there are meaningful children, add them
        if meaningful_children:
            html_elements.extend(meaningful_children)
            # Optionally, if the parent is also meaningful and not just a container, add it too
            # If you want to avoid duplicates, only add children
            continue

        # If no meaningful children, but parent is meaningful, add parent
        if is_meaningful:
            html_elements.append(child)

    return html_elements


def parse_nested_elements(html_snippet):
    soup = BeautifulSoup(html_snippet, "html.parser")
    soup_list = [child for child in soup.contents if getattr(child, 'name', None)]
    html_elements = []
    for element in soup_list:
        for c in element:
            if getattr(c, 'name', None):
                html_elements.append(c)
    return html_elements

def convert_html_elements_to_storymap_node(html_elements):
    content_nodes = []
    image_metadata = []  # To store (img, caption, alt, link) tuples
    for el in html_elements:
        node = convert_element_to_storymap_object(el)
        if isinstance(node, tuple):
            img, caption, alt, link = node
            content_nodes.append(img)
            image_metadata.append((img, caption, alt, link))
        elif node:
            content_nodes.append(node)
    return content_nodes, image_metadata

In [None]:
# Extract story data
classic_story_settings = classic_item_data["values"]["settings"]
classic_story_theme = classic_story_settings["theme"]
classic_story_title = classic_item_data["values"]["title"]
classic_story_subtitle = classic_item_data["values"].get("subtitle", "")
classic_story_data = classic_item_data["values"]["story"]

# Extract tabs (entries list)
entries = classic_story_data["entries"]

# Fetch theme group
classic_theme_group = classic_story_theme["colors"]["group"]
if classic_theme_group == "dark":
    new_theme = Themes.OBSIDIAN
elif classic_theme_group == "light":
    new_theme = Themes.SUMMIT

created_storymaps = []
published_storymap_items = []
description_html = {}
nested_elements_df = {}
root_elements_df = {}
content_nodes = {}
content_image_metadata = {}
text_panels = {}

target_index = 5  # Change to the index of the entry you want to process (0-based)
for i, entry in enumerate(entries):
    # if i != target_index:
    #    continue # Skip all except the target index

    # Fetch entry title
    entry_title = entry.get("title")

    # Create a new StoryMap
    story = StoryMap()
    story.theme(new_theme)

    # Create Sidecar immersive section
    sidecar = Sidecar(style="docked-panel")

    # Add Sidecar to story
    story.add(sidecar)

    # Determine media content for main stage
    media_info = entry.get("media", {})
    media_type = media_info.get("type")

    media_content = None
    if media_type == "webmap":
        webmap_id = media_info.get('webmap', {}).get('id')
        if webmap_id:
            media_content = Map(webmap_id)
    elif media_type == "webpage":
        webpage_url = media_info.get("webpage", {}).get("url")
        if webpage_url:
            media_content = Embed(webpage_url)
  
    # Fetch content from description (HTML)
    description_html[i] = entry.get("description", "")

    # Convert description HTML to pandas dataframes for inspection
    # Get the elements (choose one of the functions)
    nested_elements = parse_nested_elements(description_html[i])
    root_elements = parse_root_elements(description_html[i])

    # Build a DataFrame with tag name, text, and raw HTML
    nested_elements_df[i] = pd.DataFrame([{
        'tag': el.name,
        'text': el.get_text(strip=True),
        'img_url': el.find('img')['src'] if el.find('img') else None,
        'img_alt': el.find('img')['alt'] if el.find('img') and el.find('img').has_attr('alt') else None,
        'img_caption': el.find('figcaption').get_text(strip=True) if el.find('figcaption') else None,
        #'img_link': "", # TO DO handle occasions when image is intended to launch a link
        'raw_html': str(el)
    } for el in nested_elements])

    root_elements_df[i] = pd.DataFrame([{
        'tag': el.name,
        'text': el.get_text(strip=True),
        'img_url': el.find('img')['src'] if el.find('img') else None,
        'img_alt': el.find('img')['alt'] if el.find('img') and el.find('img').has_attr('alt') else None,
        'img_caption': el.find('figcaption').get_text(strip=True) if el.find('figcaption') else None,
        #'img_link': "", # TO DO handle occasions when image is intended to launch a link
        'raw_html': str(el)
    } for el in root_elements])

    # Convert description HTML to StoryMap content nodes
    content_nodes[i], content_image_metadata[i] = convert_html_elements_to_storymap_node(parse_root_elements(description_html[i]))
    nested_content_nodes, nested_image_metadata = convert_html_elements_to_storymap_node(parse_nested_elements(description_html[i]))

    # Create text panel from narrative nodes
    text_panels[i] = Text(content_nodes[i])
    #story.add(text_panel)

    # Add a slide to the sidecar with main media (no text panel yet)
    #sidecar.add_slide(contents=nested_content_nodes,media=media_content)
    sidecar.add_slide(contents=content_nodes[i], media=media_content)

    # Assign metadata to each image in contents
    # for img, caption, alt, link in nested_image_metadata:
    #     if caption:
    #         img.caption = caption
    #     if alt:
    #         img.alt_text = alt
    #     if link:
    #         img.link = link

    # Assign metadata to each image in contents
    for img, caption, alt, link in content_image_metadata[i]:
        if caption:
            img.caption = caption
        if alt:
            img.alt_text = alt
        if link:
            img.link = link

    # Set webmap properties. Map must be added to the story before setting viewpoint
    if media_type == "webmap":
        # Set the extent for the map stage
        extent_json = media_info.get('webmap', {}).get('extent')
        if extent_json:
            media_content.set_viewpoint(extent=extent_json)  # Extent dict per docs
        # Set layer visibility (if StoryMap Map object supports)
        old_layers = media_info.get('webmap', {}).get('layers', [])
        if hasattr(media_content, "map_layers"):
            for new_lyr in media_content.map_layers:
                for old_lyr in old_layers:
                    if new_lyr['id'] == old_lyr['id']:
                        new_lyr['visible'] = old_lyr['visibility']

    # Set Cover properties
    cover_properties = story.content_list[0]
    cover_properties.title = entry_title
    cover_properties.byline = ""
    cover_properties.date = "none"
    #cover_properties.media = createThumbnail() # figure out a way to create a thumbnail from the first Sidecar media item

    # As the Cover class does not include a setting to hide the cover, we hide it by adding the 'config' key
    # to the Cover json
    for k,v in story.properties['nodes'].items():
        if v['type'] == 'storycover':
            v['config'] = {'isHidden': 'true'}

    # Save and publish storymap
    story_title = entry_title
    story.save(title=story_title, tags=["auto-created"], publish=True)

    # TO DO add an AGO relationship so if an attempt is made to delete story from My Content a warning is issued that the story
    # is included in a Collection (and give the name/id of the Collection(s) where it is referenced)

    created_storymaps.append(story)
    # Get the item object
    if hasattr(story, '_item'):
        published_story_item = story._item
        published_storymap_items.append(published_story_item)
    else:
        print("Could not find item for story:", story.title)
        continue

    print(f"Created replica of {story_title}")

    # Open a browser to launch the Story Checker and fully publish the story
    story_url = "https://storymaps.arcgis.com/stories/"+ published_story_item.id
    print(f"Opening: {published_story_item.title} ({story_url})")
    webbrowser.open(story_url)

print(f"Created {len(created_storymaps)} StoryMaps")


# Pause for 60 seconds
print("Waiting for 30 seconds for stories to publish...")
time.sleep(30)
print("Resuming code execution.")


In [None]:

# Create a Collection to hold the created StoryMaps
collection = Collection()
collection_title = classic_item.title
storymap_item = []
for story in published_storymap_items:
    collection.add(item=story, title=story.title)
# Assign the classic story's title to the Cover object of the Collection
collection.content[0] = collection_title
# Apply the same theme to the collection as the stories
collection.theme(new_theme)    

collection.save(title=collection_title, tags=["auto-created"], publish=True)
published_collection_item = collection._item
collection_url = "https://storymaps.arcgis.com/collections/"+ published_collection_item.id
print(f"Opening Collection: {collection_title} ({collection_url})")
webbrowser.open(collection_url)
print(f"Created Collection: {collection_title}")

   

In [None]:
published_storymap_items