# Convert Classic Esri Tabbed StoryMap
Fetch JSON from a Classic Esri Tabbed Story Map and convert each tab into its own ArcGIS StoryMap with the cover supressed. These StoryMaps can then be incoporated into an ArcGIS Collection to replcatee classic app look and feel

In [None]:
# Configure imports and environment variables
import arcgis
from bs4 import BeautifulSoup
from arcgis.apps.storymap import StoryMap, Sidecar, Text, TextStyles, Image, Map, Embed, Themes, Cover
from arcgis.gis import GIS, Item
from IPython.display import display
import pandas as pd
import json, re, requests, sys, time 
import ipywidgets as widgets
from IPython.display import display

# Set Pandas dataframe display options
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns',1000)

In [None]:
# Helper functions to extract HTML side panel content
# all possible options:  Image | Video | Audio | Embed | Map | Button | Text | Gallery | Timeline | Sidecar | Code | Table 

def deduplicate_by_containment(elements):
    # Create list of (element, outer_html_str) tuples
    elems_and_html = [(el, ' '.join(str(el).split())) for el in elements]

    keep = []
    for i, (el_i, html_i) in enumerate(elems_and_html):
        # Check if this element is contained within another (excluding itself)
        contained = False
        for j, (el_j, html_j) in enumerate(elems_and_html):
            if i != j and html_i in html_j:
                contained = True
                break
        if not contained:
            keep.append(el_i)
    return keep

def convert_color_style_to_class(tag):
    # Check if tag has 'style' attribute with color
    style = tag.get('style', '')
    # Regex to find color property (hex, rgb, named colors)
    match = re.search(r'color\s*:\s*([^;]+)', style, re.IGNORECASE)
    if match:
        color_value = match.group(1).strip()
        # Convert hex (#XXXXXX) to class name, removing #
        if color_value.startswith('#'):
            class_color = f"sm-text-color-{color_value[1:].upper()}"
        else:
            # For rgb or named color, sanitize usable string (replace spaces/paren)
            sanitized = re.sub(r'[\s\(\)]', '', color_value).replace(',', '-')
            class_color = f"sm-text-color-{sanitized.upper()}"
        # Remove color from style attribute
        new_style = re.sub(r'color\s*:\s*[^;]+;?', '', style, flags=re.IGNORECASE).strip()
        if new_style:
            tag['style'] = new_style
        else:
            del tag['style']
        # Add or append class attribute
        if 'class' in tag.attrs:
            tag['class'].append(class_color)
        else:
            tag['class'] = [class_color]

def process_html_colors_preserve_html(html_text):
    soup = BeautifulSoup(html_text, "html.parser")
    # Iterate over tags that can have styles: div, span, strong, em, p, etc.
    for tag in soup.find_all(True):
        convert_color_style_to_class(tag)
    return str(soup)

def parse_p_to_text_elements(div_html):
    soup = BeautifulSoup(div_html, "html.parser")
    text_elements = []
    # Find direct <p> children
    for p in soup.find_all("p"): #, recursive=False):
        # Get inner HTML (including styled spans, links, etc.)
        inner_html = ''.join(str(c) for c in p.contents)
        converted_html = process_html_colors_preserve_html(inner_html)
        # Convert inline CSS color to AGSM compatible color
        # <span style="color:#E2F782"> becomes <span class="sm-text-color-E2F782">
        # Create a Text node with "paragraph" style
        # https://developers.arcgis.com/python/latest/api-reference/arcgis.apps.storymap.html#arcgis.apps.storymap.story_content.Text
        text_node = Text(text=converted_html, style=TextStyles.PARAGRAPH)
        text_elements.append(text_node)
    return text_elements

def parse_p_to_image_elements(div_html):
    soup = BeautifulSoup(div_html, "html.parser")
    img_elements = []
    # Find direct <p> children
    for p in soup.find_all("p"):
        img = p.find("img")  # Find <img> inside <p>
        # Get inner HTML (including styled spans, links, etc.)
        inner_html = ''.join(str(c) for c in p.contents)

        if img:
            src_url = img.get("src", "")  # Extract src attribute safely
            alt_text = img.get("alt", "") # Extract alt text
            # Create a Image node and set properties
            # https://developers.arcgis.com/python/latest/api-reference/arcgis.apps.storymap.html#arcgis.apps.storymap.story_content.Image
            img_node = Image(path=src_url)
            img_node.alt_text = alt_text
            img_node.caption = ""
            img_node.link = ""
            img_node.image = src_url  # Assign image property. TO DO fix this for images hosted on AGO
            
            img_elements.append(img_node)
    return img_elements

In [None]:
# Create widgets
# Create checkbox as a variable
input_param1 = widgets.Checkbox(value=False, description="Yes")
user_line1 = widgets.HBox([widgets.Label(value="Are you running this within ArcGIS Online?"),input_param1])
input_param2 = widgets.Text(value="597d573e58514bdbbeb53ba2179d2359", description="Story ID:")
user_line2 = widgets.HBox([widgets.Label(value="Paste 32-digit Classic Esri Story Map id -->"), input_param2]) # TO DO add error checking logic and warning
submit_btn = widgets.Button(description="Convert Story")
output_box = widgets.Output()
def on_submit(btn):
    output_box.clear_output()
    try:
        agoNotebook = input_param1.value
        classic_storymap_id = input_param2.value  

        # Print Python and ArcGIS for Python versions since things can change between versions
        print(f"Python version: ",sys.version)
        print("ArcGIS for Python API / StoryMap module version: ",arcgis.__version__) 

        # Connect to ArcGIS Online
        # Define the GIS
        if agoNotebook == False:
            import keyring
            service_name = "system" # Use the default local credential store
            success = False # Set initial state

            # Ask for the username
            while success == False:
                username_for_keyring = input("Enter your ArcGIS Online username:") # If you are using VS Code, the text input dialog box appears at the top of the window
                # Get the credential object
                credential = keyring.get_credential(service_name, username_for_keyring)
                # Check if the username is in the credential store
                if credential is None:
                    print(f"'{username_for_keyring}' is not in the local system's credential store. Try another username.")
                # Retrieve the password, login and set the GIS portal
                else:
                    password_from_keyring = keyring.get_password("system", username_for_keyring)
                    portal_url = 'https://www.arcgis.com'  
                    gis = GIS(portal_url, username=username_for_keyring, password=password_from_keyring)
                    success = True
                    # Print a success message with username and user's organization role
                    print("Successfully logged in as: " + gis.properties.user.username, "(role: " + gis.properties.user.role + ")")
        else:
            gis = GIS("home")

        # Define the Classic StoryMap item id
        #classic_storymap_id = '597d573e58514bdbbeb53ba2179d2359'
        # Fetch the StoryMap Item from AGO
        classic_item = Item(gis=gis,itemid=classic_storymap_id)
        # Fetch the StoryMap data
        classic_data = Item.get_data(classic_item)
        if type(classic_data) == dict:
            classic_item_json = json.dumps(classic_data)
            classic_item_data = json.loads(classic_item_json)
        else:
            classic_item_data = json.loads(classic_data)

        # Extract story data
        classic_story_settings = classic_item_data["values"]["settings"]
        classic_story_theme = classic_story_settings["theme"]
        classic_story_title = classic_item_data["values"]["title"]
        classic_story_data = classic_item_data["values"]["story"]

        # Extract tabs (entries list)
        entries = classic_story_data["entries"]

        # Fetch theme group
        classic_theme_group = classic_story_theme["colors"]["group"]
        if classic_theme_group == "dark":
            new_theme = Themes.OBSIDIAN
        elif classic_theme_group == "light":
            new_theme = Themes.SUMMIT

        created_storymaps = []
        loop_limit = 3 # for testing/debugging only
        for i, entry in enumerate(entries):
            # Create a new StoryMap
            story = StoryMap()
            story.theme(new_theme)

            # Create Sidecar immersive section
            sidecar = Sidecar(style="docked-panel")

            # Add Sidecar to story
            story.add(sidecar)

            # Determine media content for main stage
            media_info = entry.get("media", {})
            media_type = media_info.get("type")

            media_content = None
            if media_type == "webmap":
                webmap_id = media_info.get('webmap', {}).get('id')
                if webmap_id:
                    media_content = Map(webmap_id)
            elif media_type == "webpage":
                webpage_url = media_info.get("webpage", {}).get("url")
                if webpage_url:
                    media_content = Embed(webpage_url)

            # Fetch content from description (HTML)
            description_html = entry.get("description", "")

            # Convert HTML from side panel to AGSM content items
            soup = BeautifulSoup(description_html, "html.parser")
            all = soup.find_all()
            all_list = list(all)
            unique_top_level = deduplicate_by_containment(all_list)
            first_element = deduplicate_by_containment(unique_top_level[0])

            img_snippet1 = str(unique_top_level[0])
            img_nodes1 = parse_p_to_image_elements(img_snippet1)

            html_snippet1 = str(first_element[2])
            converted_html2 = process_html_colors_preserve_html(html_snippet1)
            text_nodes1 = [Text(text=converted_html2, style=TextStyles.PARAGRAPH)]

            img_snippet = str(unique_top_level[1])
            img_nodes = parse_p_to_image_elements(img_snippet)

            html_snippet = str(unique_top_level[2])
            text_nodes = parse_p_to_text_elements(html_snippet)

            narrative_nodes = img_nodes1 + text_nodes1 + img_nodes + text_nodes
                
            #narrative_nodes = parse_html_to_story_content(description_html)

            # Create text panel from narrative nodes
            text_panel = Text(narrative_nodes)
            #story.add(text_panel)

            # Add a slide to the sidecar with text panel and main media
            sidecar.add_slide(contents=narrative_nodes, media=media_content)  

            # Set webmap properties. Map must be added to the story before setting viewpoint
            if media_type == "webmap":
                # Set the extent for the map stage
                extent_json = media_info.get('webmap', {}).get('extent')
                if extent_json:
                    media_content.set_viewpoint(extent=extent_json)  # Extent dict per docs
                # Set layer visibility (if StoryMap Map object supports)
                old_layers = media_info.get('webmap', {}).get('layers', [])
                if hasattr(media_content, "map_layers"):
                    for new_lyr in media_content.map_layers:
                        for old_lyr in old_layers:
                            if new_lyr['id'] == old_lyr['id']:
                                new_lyr['visible'] = old_lyr['visibility']
            
            # Set Cover properties
            cover_properties = story.content_list[0]
            cover_properties.byline = ""
            cover_properties.date = "none"
            #cover_properties.media = createThumbnail() # figure out a way to create a thumbnail from the first Sidecar media item

            # As the Cover class does not include a setting to hide the cover, we hide it by adding the 'config' key
            # to the Cover json
            for k,v in story.properties['nodes'].items():
                if v['type'] == 'storycover':
                    v['config'] = {'isHidden': 'true'}


            # Save and publish storymap
            story_title = entry.get("title", "Untitled Story")
            story.save(title=story_title, tags=["auto-created"], publish=True)

            # TO DO add an AGO relationship so if an attempt is made to delete story from My Content a warning is issued that the story
            # is included in a Collection (and give the name/id of the Collection(s) where it is referenced)

            created_storymaps.append(story)
            print(f"Created replica of {story_title}")
            if i > loop_limit:
                break

        print(f"Created {len(created_storymaps)} StoryMaps")
        with output_box:
            print("Success! Classic Story Map converted to AGSM Collection")
    except Exception as e:
        with output_box:
            print(f"Error: {str(e)}")
submit_btn.on_click(on_submit)

# Display UI elements
display(widgets.VBox([user_line2, user_line1, submit_btn, output_box]))

VBox(children=(HBox(children=(Label(value='Paste 32-digit Classic Esri Story Map id -->'), Text(value='597d573…

In [None]:
# def parse_html_to_story_content(description_html):
#     soup = BeautifulSoup(description_html, "html.parser")
#     content_nodes = []
    
#     # Parse each image-container div separately, handling nested containers as separate units
#     image_containers = soup.find_all("div", class_="image-container")
#     print(f"Found {len(image_containers)} image containers")
#     processed_containers = set()
    
#     for container in image_containers:
#         if container in processed_containers:
#             continue
        
#         # Mark all nested containers inside this one as processed to avoid duplication
#         nested_containers = container.find_all("div", class_="image-container")
#         processed_containers.update(nested_containers)
#         processed_containers.add(container)
        
#         # Extract images inside this container
#         imgs = container.find_all("img")
#         print(f"Found {len(imgs)} images within image container")
#         for img in imgs:
#             src = img.get("src")
#             alt = img.get("alt", "")
#             if src:
#                 img_obj = Image(src)
#                 img_obj.alt_text = alt
#                 content_nodes.append(img_obj)
        
#         # Extract styled text inside container preserving inline HTML (for rich text)
#         # Extract text from the container, joining nested elements with a space
#         text_content = container.get_text(separator=' ', strip=True)
#         # Use inner HTML excluding images (already processed)
#         styles = []
#         spans = soup.find_all("span", style=True)
#         if spans:
#             for span in spans:
#                 style = span.get("style")
#                 styles.append(style)
#             print(styles)
#         # Remove images tags to avoid duplication in text
#         for img in imgs:
#             img.decompose()
#         print(text_content)
        
#         # Get remaining inner HTML as string
#         inner_html = ''.join(str(e) for e in container.contents).strip()
#         if inner_html:
#             # Use the raw HTML inside a Text node to keep inline styling
#             text_node = Text()
#             text_node.text = inner_html
#             content_nodes.append(text_node)
    
#     # For content outside of image-container divs, parse remaining paragraphs, links, etc.
#     # Remove all image-container divs to avoid duplicates
#     for img_cont in image_containers:
#         img_cont.decompose()
    
#     # Parse remaining paragraphs
#     para = soup.find_all("p")
#     print(f"Found {len(para)} paragraphs outside image containers")
#     for p in soup.find_all("p"):
#         html_content = str(p)
#         print(html_content)
#         if html_content:
#             text_node = Text()
#             text_node.text = html_content
#             content_nodes.append(text_node)
#             #print(text_node.text)
    
#     # Parse standalone links for embedding
#     # for a in soup.find_all("a"):
#     #     href = a.get("href")
#     #     if href:
#     #         content_nodes.append(Embed(href))
    
#     print(f"Found {len(content_nodes)} elements")
#     return content_nodes

In [None]:
soup = BeautifulSoup(description_html, "html.parser")
all = soup.find_all()
all_list = list(all)
divs = soup.find_all("div")
divs_list = list(divs)
paras = soup.find_all("p")
image_containers = soup.find_all("div", class_="image-container")
print(len(all))
print(f"Number of divs: {len(divs)}, Number of paragraphs {len(paras)}, Number of image containers {len(image_containers)}")

In [None]:
def deduplicate_by_containment(elements):
    # Create list of (element, outer_html_str) tuples
    elems_and_html = [(el, ' '.join(str(el).split())) for el in elements]

    keep = []
    for i, (el_i, html_i) in enumerate(elems_and_html):
        # Check if this element is contained within another (excluding itself)
        contained = False
        for j, (el_j, html_j) in enumerate(elems_and_html):
            if i != j and html_i in html_j:
                contained = True
                break
        if not contained:
            keep.append(el_i)
    return keep

In [None]:
unique_top_level = deduplicate_by_containment(all_list)

In [None]:
html_snippet = str(unique_top_level[2])
text_nodes = parse_div_to_text_elements(html_snippet)

In [None]:
def parse_div_to_image_elements(div_html):
    soup = BeautifulSoup(div_html, "html.parser")
    img_elements = []
    # Find direct <p> children
    for p in soup.find_all("p"):
        img = p.find("img")  # Find <img> inside <p>
        # Get inner HTML (including styled spans, links, etc.)
        inner_html = ''.join(str(c) for c in p.contents)

        if img:
            src_url = img.get("src", "")  # Extract src attribute safely
            alt_text = img.get("alt", "")
            # Create a Image node and set properties
            # https://developers.arcgis.com/python/latest/api-reference/arcgis.apps.storymap.html#arcgis.apps.storymap.story_content.Image
            img_node = Image(path=src_url)
            img_node.alt_text = alt_text
            img_node.caption = ""
            img_node.link = ""
            img_node.image = src_url  # Assign image property
            
            img_elements.append(img_node)
    return img_elements

In [None]:
print(unique_top_level[0])

In [None]:
html_snippet = str(unique_top_level[0])
text_nodes1 = parse_div_to_text_elements(html_snippet)

In [None]:
img_snippet = str(unique_top_level[0])
img_nodes1 = parse_div_to_image_elements(img_snippet)

In [None]:
for img_node in img_nodes1:
    story.add(img_node)

In [None]:
print(story.content_list)

In [None]:
print(story.content_list[3].properties)

In [None]:
first_element = deduplicate_by_containment(unique_top_level[0])

In [None]:
html_snippet2 = str(first_element[2])
print(html_snippet2)

In [None]:
def process_html_colors_preserve_html(html_text):
    soup = BeautifulSoup(html_text, "html.parser")
    # Iterate over tags that can have styles: div, span, strong, em, p, etc.
    for tag in soup.find_all(True):
        convert_color_style_to_class(tag)
    return str(soup)

In [None]:
converted_html = process_html_colors_preserve_html(html_snippet2)
print(converted_html)

In [None]:
# Define the AGSM item id
agsm_storymap_id = '808bf08835b740f69022bc1b3593a143'
# Fetch the StoryMap Item from AGO
agsm_item = Item(gis=gis,itemid=agsm_storymap_id)
# Fetch the StoryMap data
agsm_data = Item.get_data(agsm_item)
if type(agsm_data) == dict:
    agsm_item_json = json.dumps(agsm_data)
    agsm_item_data = json.loads(agsm_item_json)
else:
    agsm_item_data = json.loads(agsm_data)