# Generalized sketch

TO DO - create conversion tool for classic swipe (second tab in Katrina story)

In [41]:
from bs4 import BeautifulSoup, NavigableString
from arcgis.apps.storymap import StoryMap, Themes, Image, Video, Audio, Embed, Map, Button, Text, Gallery, Timeline, Sidecar, Code, Table, TextStyles
from arcgis.gis import GIS, Item
from IPython.display import display
import pandas as pd
import re, json, requests, sys, time 

agoNotebook = False

# Set Pandas dataframe display options
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns',1000)

In [2]:
# Print Python and ArcGIS for Python versions
# since things can change between versions
import sys
print(f"Python version: ",sys.version)
import arcgis
print("ArcGIS for Python API / StoryMap module version: ",arcgis.__version__)

Python version:  3.11.11 (main, Mar  3 2025, 15:29:37) [MSC v.1938 64 bit (AMD64)]
ArcGIS for Python API / StoryMap module version:  2.4.1


In [3]:
# Connect to ArcGIS Online
# Define the GIS
if agoNotebook == False:
    import keyring
    service_name = "system" # Use the default local credential store
    success = False # Set initial state

    # Ask for the username
    while success == False:
        username_for_keyring = input("Enter your ArcGIS Online username:") # If you are using VS Code, the text input dialog box appears at the top of the window
        # Get the credential object
        credential = keyring.get_credential(service_name, username_for_keyring)
        # Check if the username is in the credential store
        if credential is None:
            print(f"'{username_for_keyring}' is not in the local system's credential store. Try another username.")
        # Retrieve the password, login and set the GIS portal
        else:
            password_from_keyring = keyring.get_password("system", username_for_keyring)
            portal_url = 'https://www.arcgis.com'  
            gis = GIS(portal_url, username=username_for_keyring, password=password_from_keyring)
            success = True
            # Print a success message with username and user's organization role
            print("Successfully logged in as: " + gis.properties.user.username, "(role: " + gis.properties.user.role + ")")
else:
    gis = GIS("home")

Successfully logged in as: dasbury_storymaps (role: org_admin)


In [4]:
# Define the Classic StoryMap item id
classic_storymap_id = '597d573e58514bdbbeb53ba2179d2359'
# Fetch the StoryMap Item from AGO
classic_item = Item(gis=gis,itemid=classic_storymap_id)
# Fetch the StoryMap data
classic_data = Item.get_data(classic_item)
if type(classic_data) == dict:
    classic_item_json = json.dumps(classic_data)
    classic_item_data = json.loads(classic_item_json)
else:
    classic_item_data = json.loads(classic_data)

In [22]:
# Helper functions
def convert_color_style_to_class(tag):
    # Check if tag has 'style' attribute with color
    style = tag.get('style', '')
    # Regex to find color property (hex, rgb, named colors)
    match = re.search(r'color\s*:\s*([^;]+)', style, re.IGNORECASE)
    if match:
        color_value = match.group(1).strip()
        # Convert hex (#XXXXXX) to class name, removing #
        if color_value.startswith('#'):
            class_color = f"sm-text-color-{color_value[1:].upper()}"
        else:
            # For rgb or named color, sanitize usable string (replace spaces/paren)
            sanitized = re.sub(r'[\s\(\)]', '', color_value).replace(',', '-')
            class_color = f"sm-text-color-{sanitized.upper()}"
        # Remove color from style attribute
        new_style = re.sub(r'color\s*:\s*[^;]+;?', '', style, flags=re.IGNORECASE).strip()
        if new_style:
            tag['style'] = new_style
        else:
            del tag['style']
        # Add or append class attribute
        if 'class' in tag.attrs:
            tag['class'].append(class_color)
        else:
            tag['class'] = [class_color]

def process_html_colors_preserve_html(html_text):
    soup = BeautifulSoup(html_text, "html.parser")
    # Iterate over tags that can have styles: div, span, strong, em, p, etc.
    for tag in soup.find_all(True):
        convert_color_style_to_class(tag)
    return str(soup)

def parse_content_element(el):
    tag_name = el.name
    if tag_name == "p" or tag_name in ["span", "strong", "em", "div"]:
        # Extract inner HTML preserving inline styles
        inner_html = ''.join(str(c) for c in el.contents)
        processed_html = process_html_colors_preserve_html(inner_html)
        return Text(text=processed_html, style=TextStyles.PARAGRAPH)
    
    elif tag_name == "img":
        src = el.get("src")
        alt = el.get("alt", "")
        link = ""
        if el.get("href"):
            link = el.get("href")
        img = Image(path=src)
        img.alt_text = alt
        img.caption = "" # TO DO try to find Classic stories that have images with captions
        img.link = link
        img.image = src  # Assign image property. TO DO fix this for images hosted on AGO
        return img

    elif tag_name == "video":
        src = el.get("src")
        alt = el.get("alt", "")
        vid = Video(path=src)
        vid.alt_text = alt
        vid.caption = "" # TO DO try to find Classic stories that have Videos with captions
        vid.video = src # Assign video property. TO DO fix this for hosted videos
        return vid
    
    elif tag_name == "audio":
        src = el.get("src")
        alt = el.get("alt", "")
        aud = Audio(path=src)
        aud.alt_text = alt
        aud.caption = "" # TO DO try to find Classic stories that have Audio with captions
        aud.audio = src # Assign Audio property. TO DO fix this for hosted videos
        return aud
    
    elif tag_name == "iframe" or tag_name == "embed":
        src = el.get("src") or el.get("data-src")
        alt = el.get("alt", "")
        if src:
            emb = Embed(path=src)
            emb.alt_text = alt
            emb.caption = "" # TO DO try to find Classic stories that have Embeds with captions
            emb.link = src
        return emb

    elif tag_name == "map":
        src = el.get("src")
        alt = el.get("alt", "")
        extent = "" #TO DO get extent
        layers = "" # TO DO get map layers
        mp = Map(item="")
        mp.alt_text = alt
        mp.caption = "" # TO DO try to find Classic stories that have Maps in Sidecar panel with captions
        mp.map = src
        mp.map_layers = layers 
        mp.set_viewpoint = extent
        return aud
    
    else:
        # Fallback for unsupported or unknown types - treat as text
        inner_html = ''.join(str(c) for c in el.contents)
        processed_html = process_html_colors_preserve_html(inner_html)
        return Text(text=processed_html, style=TextStyles.PARAGRAPH)

def deduplicate_by_containment(elements):
    # Create list of (element, outer_html_str) tuples
    elems_and_html = [(el, ' '.join(str(el).split())) for el in elements]

    keep = []
    for i, (el_i, html_i) in enumerate(elems_and_html):
        # Check if this element is contained within another (excluding itself)
        contained = False
        for j, (el_j, html_j) in enumerate(elems_and_html):
            if i != j and html_i in html_j:
                contained = True
                break
        if not contained:
            keep.append(el_i)
    return keep

def parse_narrative_html(html_text):
    soup = BeautifulSoup(html_text, "html.parser")
    content_nodes = []
    for child in soup.children:
        if isinstance(child, str):
            # Text node (likely whitespace) - skip or wrap in Text()
            node = Text(child)
            continue
        node = parse_content_element(child)
        print(type(node))
        if node:
            content_nodes.append(node)
    #deduped_nodes = deduplicate_by_containment(content_nodes)
    return content_nodes


In [23]:
# Extract story data
classic_story_settings = classic_item_data["values"]["settings"]
classic_story_theme = classic_story_settings["theme"]
classic_story_title = classic_item_data["values"]["title"]
classic_story_data = classic_item_data["values"]["story"]

# Extract tabs (entries list)
entries = classic_story_data["entries"]

# Fetch theme group
classic_theme_group = classic_story_theme["colors"]["group"]
if classic_theme_group == "dark":
    new_theme = Themes.OBSIDIAN
elif classic_theme_group == "light":
    new_theme = Themes.SUMMIT

created_storymaps = []
loop_limit = 0 # Zero indexed. For testing/debugging only
for i, entry in enumerate(entries):
    # Create a new StoryMap
    story = StoryMap()
    story.theme(new_theme)

    # Create Sidecar immersive section
    sidecar = Sidecar(style="docked-panel")

    # Add Sidecar to story
    story.add(sidecar)

    # Determine media content for main stage
    media_info = entry.get("media", {})
    media_type = media_info.get("type")

    media_content = None
    if media_type == "webmap":
        webmap_id = media_info.get('webmap', {}).get('id')
        if webmap_id:
            media_content = Map(webmap_id)
    elif media_type == "webpage":
        webpage_url = media_info.get("webpage", {}).get("url")
        if webpage_url:
            media_content = Embed(webpage_url)

    # Fetch content from description (HTML)
    description_html = entry.get("description", "")

    narrative_nodes = parse_narrative_html(description_html)

    # Create text panel from narrative nodes
    text_panel = Text(narrative_nodes)
    #story.add(text_panel)

    # Add a slide to the sidecar with text panel and main media
    sidecar.add_slide(contents=narrative_nodes, media=media_content)  

    # Set webmap properties. Map must be added to the story before setting viewpoint
    if media_type == "webmap":
        # Set the extent for the map stage
        extent_json = media_info.get('webmap', {}).get('extent')
        if extent_json:
            media_content.set_viewpoint(extent=extent_json)  # Extent dict per docs
        # Set layer visibility (if StoryMap Map object supports)
        old_layers = media_info.get('webmap', {}).get('layers', [])
        if hasattr(media_content, "map_layers"):
            for new_lyr in media_content.map_layers:
                for old_lyr in old_layers:
                    if new_lyr['id'] == old_lyr['id']:
                        new_lyr['visible'] = old_lyr['visibility']
    
    # Set Cover properties
    cover_properties = story.content_list[0]
    cover_properties.byline = ""
    cover_properties.date = "none"
    #cover_properties.media = createThumbnail() # figure out a way to create a thumbnail from the first Sidecar media item

    # As the Cover class does not include a setting to hide the cover, we hide it by adding the 'config' key
    # to the Cover json
    for k,v in story.properties['nodes'].items():
        if v['type'] == 'storycover':
            v['config'] = {'isHidden': 'true'}


    # Save and publish storymap
    story_title = entry.get("title", "Untitled Story")
    story.save(title=story_title, tags=["auto-created"], publish=True)

    # TO DO add an AGO relationship so if an attempt is made to delete story from My Content a warning is issued that the story
    # is included in a Collection (and give the name/id of the Collection(s) where it is referenced)

    created_storymaps.append(story)
    print(f"Created replica of {story_title}")
    if i == loop_limit:
        break

print(f"Created {len(created_storymaps)} StoryMaps")

<class 'arcgis.apps.storymap.story_content.Text'>
<class 'arcgis.apps.storymap.story_content.Text'>
<class 'arcgis.apps.storymap.story_content.Text'>
<class 'arcgis.apps.storymap.story_content.Text'>
Created replica of The Katrina Diaspora
Created 1 StoryMaps


In [24]:
print(description_html)

<div class="image-container">
<div class="image-container">
<p style="text-align:center"><img alt="" src="https://lh3.googleusercontent.com/-OZs54tCn6mM/VdYjIf-XQ0I/AAAAAAAAAOU/HrFElBu60xw/s1600/legends-08.png" width="496" height="240"></p>
</div>
&nbsp;

<div class="image-container">&nbsp;</div>

<div class="image-container"><span style="color:#E5FA84"><span style="font-size:20px"><strong>Hurricane Katrina displaced over one million Louisiana residents -- an estimated 277,000 did not come back to resettle.</strong>&nbsp;</span></span></div>
</div>

<p style="text-align:center"><img alt="" src="https://lh3.googleusercontent.com/-Frhyo5iY8mo/Vc40Ea084sI/AAAAAAAAAEY/r_AIGqcYI9E/s1600/legends-03.png" width="402" height="132"></p>

<p>&nbsp;</p>

<div>
<p><span style="font-size:12px">Threatened by one of the most destructive and influential storms in United States history, those living in the path of Hurricane Katrina&nbsp;fled to every state in the country, with many&nbsp;unable to return

In [28]:
print(narrative_nodes[3].properties)

{'node_dict': {'type': 'text', 'data': {'type': 'paragraph', 'text': '\n<p><span style="font-size:12px">Threatened by one of the most destructive and influential storms in United States history, those living in the path of Hurricane Katrina\xa0fled to every state in the country, with many\xa0unable to return home to Louisiana after the storm left.\xa0This map, which uses data from the 2006 American Community Survey,\xa0presents a good, but imperfect illustration of\xa0where evacuees ended up by showing\xa0where\xa0residents who lived in Louisiana\xa0in 2005 moved to as of 2006.\xa0<span class="sm-text-color-E2F782"><strong>Click on each state</strong></span>\xa0for details.\xa0</span></p>\n<p>\xa0</p>\n<p><span style="font-size:10px">Data Sources:\xa0<a href="https://www.census.gov/hhes/migration/data/acs/state-to-state.html" target="_blank">2006 American Community Survey 1-year Estimates, State-to-State Migration Flows</a>,\xa0<a href="http://www.nhc.noaa.gov/" target="_blank">NHC</a>

In [33]:
def parse_content_element(el):
    tag_name = el.name
    if tag_name == "p" or tag_name in ["span", "strong", "em", "div"]:
        # Extract inner HTML preserving inline styles
        inner_html = ''.join(str(c) for c in el.contents)
        processed_html = process_html_colors_preserve_html(inner_html)
        return Text(text=processed_html, style=TextStyles.PARAGRAPH)
    
    elif tag_name == "img":
        src = el.get("src")
        alt = el.get("alt", "")
        link = ""
        if el.get("href"):
            link = el.get("href")
        img = Image(path=src)
        img.alt_text = alt
        img.caption = "" # TO DO try to find Classic stories that have images with captions
        img.link = link
        img.image = src  # Assign image property. TO DO fix this for images hosted on AGO
        return img

    elif tag_name == "video":
        src = el.get("src")
        alt = el.get("alt", "")
        vid = Video(path=src)
        vid.alt_text = alt
        vid.caption = "" # TO DO try to find Classic stories that have Videos with captions
        vid.video = src # Assign video property. TO DO fix this for hosted videos
        return vid
    
    elif tag_name == "audio":
        src = el.get("src")
        alt = el.get("alt", "")
        aud = Audio(path=src)
        aud.alt_text = alt
        aud.caption = "" # TO DO try to find Classic stories that have Audio with captions
        aud.audio = src # Assign Audio property. TO DO fix this for hosted videos
        return aud
    
    elif tag_name == "iframe" or tag_name == "embed":
        src = el.get("src") or el.get("data-src")
        alt = el.get("alt", "")
        if src:
            emb = Embed(path=src)
            emb.alt_text = alt
            emb.caption = "" # TO DO try to find Classic stories that have Embeds with captions
            emb.link = src
        return emb

    elif tag_name == "map":
        src = el.get("src")
        alt = el.get("alt", "")
        extent = "" #TO DO get extent
        layers = "" # TO DO get map layers
        mp = Map(item="")
        mp.alt_text = alt
        mp.caption = "" # TO DO try to find Classic stories that have Maps in Sidecar panel with captions
        mp.map = src
        mp.map_layers = layers 
        mp.set_viewpoint = extent
        return aud
    
    else:
        # Fallback for unsupported or unknown types - treat as text
        inner_html = ''.join(str(c) for c in el.contents)
        processed_html = process_html_colors_preserve_html(inner_html)
        print("Type check failed. Returning Text object as default.")
        return Text(text=processed_html, style=TextStyles.PARAGRAPH)


In [None]:
print(description_html)

In [42]:
def parse_html_recursive(element):
    content_nodes = []

    if isinstance(element, NavigableString):
        # Skip whitespace text nodes or wrap small text as Text if relevant
        text = element.string.strip()
        if text:
            content_nodes.append(Text(text=text, style=TextStyles.PARAGRAPH))
        return content_nodes

    if element.name == 'img':
        src = element.get('src')
        alt = element.get('alt', '')
        img_node = Image(path=src)
        img_node.alt_text = alt
        content_nodes.append(img_node)
        return content_nodes

    if element.name in ['p', 'span', 'strong', 'em', 'div', 'section', 'figure', 'caption']:
        # For container text tags, process children individually
        for child in element.children:
            content_nodes.extend(parse_html_recursive(child))
        return content_nodes

    # For other tags, fallback to converting the full inner HTML as one Text node
    inner_html = ''.join(str(c) for c in element.contents)
    if inner_html.strip():
        text_node = Text(text=inner_html, style=TextStyles.PARAGRAPH)
        content_nodes.append(text_node)
    return content_nodes

def parse_narrative_html(html_text):
    soup = BeautifulSoup(html_text, "html.parser")
    content_nodes = []
    for child in soup.children:
        # if isinstance(child, str):
        #     # Text node (likely whitespace) - skip or wrap in Text()
        #     node = Text(text=child)
        #     print("Node is raw text")
        #     continue
        print(child)
        node = parse_html_recursive(child)
        print(type(node), node)
        if node:
            content_nodes.append(node)
    #deduped_nodes = deduplicate_by_containment(content_nodes)
    return content_nodes

In [43]:
nodes = parse_narrative_html(description_html)

<div class="image-container">
<div class="image-container">
<p style="text-align:center"><img alt="" height="240" src="https://lh3.googleusercontent.com/-OZs54tCn6mM/VdYjIf-XQ0I/AAAAAAAAAOU/HrFElBu60xw/s1600/legends-08.png" width="496"/></p>
</div>
 

<div class="image-container"> </div>
<div class="image-container"><span style="color:#E5FA84"><span style="font-size:20px"><strong>Hurricane Katrina displaced over one million Louisiana residents -- an estimated 277,000 did not come back to resettle.</strong> </span></span></div>
</div>
<class 'list'> [Image, Text]


<class 'list'> []
<p style="text-align:center"><img alt="" height="132" src="https://lh3.googleusercontent.com/-Frhyo5iY8mo/Vc40Ea084sI/AAAAAAAAAEY/r_AIGqcYI9E/s1600/legends-03.png" width="402"/></p>
<class 'list'> [Image]


<class 'list'> []
<p> </p>
<class 'list'> []


<class 'list'> []
<div>
<p><span style="font-size:12px">Threatened by one of the most destructive and influential storms in United States history, those livi

In [47]:
print(nodes[2])

[Text, Text, Text, Text, Text, Text, Text, Text, Text, Text, Text, Text]
