## Convert Classic Esri StoryMap Series (Tabbed Layout)
Fetch JSON from an ArcGIS Online hosted Classic Esri Story Map Series App and convert each tab into its own ArcGIS StoryMap with the cover supressed. Once converted, each ArcGIS StoryMap will open in a browser tab in order to complete the Story Checker. Once all are published, an ArcGIS StoryMap Collection is created that contains the converted app to replcate classic app look and feel.

TO DO - create conversion tool for classic swipe (second tab in Katrina story)

In [1]:
# Check for required packages and install if necessary
import sys

def ensure_package(package, import_name=None):
    import_name = import_name or package
    try:
        __import__(import_name)
    except ImportError:
        try:
            print(f"Installing {package} ...")
            !{sys.executable} -m pip install {package}
        except Exception as e:
            print(f"Could not install {package}: {e}")

ensure_package("webcolors")
ensure_package("bs4")
ensure_package("arcgis")
ensure_package("Pillow", "PIL")
ensure_package("pandas")
ensure_package("requests")

TO DO - add cell describing keychain config

In [None]:
# Import packages, config, AGO authentication and helper functions
from bs4 import BeautifulSoup, NavigableString, Tag
from arcgis.apps.storymap import StoryMap, Themes, Image, Video, Audio, Embed, Map, Button, Text, Gallery, Timeline, Sidecar, Code, Table, TextStyles, Collection, CollectionNavigation
from arcgis.gis import GIS, Item
import arcgis.mapping
from PIL import Image as PILImage
from io import BytesIO
from IPython.display import display
import pandas as pd
import webcolors
import webbrowser
import tempfile
import ipywidgets as widgets
from ipywidgets import IntProgress
import re, json, requests, sys, time, os

agoNotebook = True

# Set Pandas dataframe display options
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns',1000)

# Print Python and ArcGIS for Python versions
# since things can change between versions

print(f"Python version: ",sys.version)

print("ArcGIS for Python API / StoryMap module version: ",arcgis.__version__)
# Connect to ArcGIS Online
# Define the GIS

gis = GIS("home")
print("Successfully logged in as: " + gis.properties.user.username, "(role: " + gis.properties.user.role + ")")
    
# Helper functions

default_thumbnail_path = "https://cdn-a.arcgis.com/cdn/1BE082D/js/arcgis-app-components/arcgis-app/assets/arcgis-item-thumbnail/storymap.png"

# Instead of using threading.Thread and stopevent, update progress directly after each major step
def update_progress(progressbar, value, description=''):
    progressbar.value = value
    progressbar.description = description if description else progressbar.description

def color_to_hex(color_value):
    color_value = color_value.strip()
    # Check for rgb() format
    rgb_match = re.match(r'rgb-?(\d+)-?(\d+)-?(\d+)', color_value, re.IGNORECASE)
    if rgb_match:
        r, g, b = map(int, rgb_match.groups())
        return '{:02X}{:02X}{:02X}'.format(r, g, b)
    # Check for named color
    try:
        return webcolors.name_to_hex(color_value.lower())
    except ValueError:
        pass
    # Already hex
    if color_value.startswith('#') and len(color_value) == 7:
        return color_value.upper()
    return None

def convert_color_style_to_class(tag):
    # Check if tag has 'style' attribute with color
    style = tag.get('style', '')
    # Regex to find color property (hex, rgb, named colors)
    match = re.search(r'color\s*:\s*([^;]+)', style, re.IGNORECASE)
    if match:
        color_value = match.group(1).strip()
        # Convert hex (#XXXXXX) to class name, removing #
        if color_value.startswith('#'):
            class_color = f"sm-text-color-{color_value[1:].upper()}"
        else:
            # For rgb or named color, sanitize usable string (replace spaces/paren)
            sanitized = re.sub(r'[\s\(\)]', '', color_value).replace(',', '-')
            hex_color = color_to_hex(sanitized)
            class_color = f"sm-text-color-{hex_color}"
        # Remove color from style attribute
        new_style = re.sub(r'color\s*:\s*[^;]+;?', '', style, flags=re.IGNORECASE).strip()
        if new_style:
            tag['style'] = new_style
        else:
            del tag['style']
        # Add or append class attribute
        if 'class' in tag.attrs:
            tag['class'].append(class_color)
        else:
            tag['class'] = [class_color]

def process_html_colors_preserve_html(html_text):
    soup = BeautifulSoup(html_text, "html.parser")
    # Iterate over tags that can have styles: div, span, strong, em, p, etc.
    for tag in soup.find_all(True):
        convert_color_style_to_class(tag)
    return str(soup)

def convert_element_to_storymap_object(el):
    img_tag = el.find('img')
    if img_tag:
        src = img_tag.get("src")
        # Upgrade http to https if needed
        if src and src.startswith("http://"):
            src = "https://" + src[len("http://"):]
        alt = img_tag.get("alt", "")
        link = "" # TO DO handle occasions when image is intended to launch a link
        # Find figcaption in parent figure or div
        figcaption = ""
        # print("img_tag:", img_tag)
        parent_figure = img_tag.find_parent("figure")
        # print("parent_figure:", parent_figure)
        if parent_figure:
            caption_tag = parent_figure.find("figcaption")
            # print("caption_tag:", caption_tag)
            if caption_tag:
                figcaption = caption_tag.get_text(strip=True)
        else:
            # Try to find figcaption in the parent div
            parent_div = img_tag.find_parent("div")
            # print("parent_div:", parent_div)
            if parent_div:
                caption_tag = parent_div.find("figcaption")
                # print("caption_tag (div):", caption_tag)
                if caption_tag:
                    figcaption = caption_tag.get_text(strip=True)
        # print("Extracted figcaption:", figcaption, type(figcaption))
        img = Image(path=src)
        #img.link = link
        #img.image = src
        return img, figcaption, alt, link

    tag_name = el.name
    if tag_name == "p": # or tag_name in ["span", "strong", "em", "div"]:
        # Extract inner HTML preserving inline styles
        inner_html = ''.join(str(c) for c in el.contents)
        processed_html = process_html_colors_preserve_html(inner_html)
        return Text(text=processed_html, style=TextStyles.PARAGRAPH)

    elif tag_name == "video":
        src = el.get("src")
        alt = el.get("alt", "")
        vid = Video(path=src)
        vid.alt_text = alt
        vid.caption = "" # TO DO try to find Classic stories that have Videos with captions
        vid.video = src # Assign video property. TO DO fix this for hosted videos
        return vid
    
    elif tag_name == "audio":
        src = el.get("src")
        alt = el.get("alt", "")
        aud = Audio(path=src)
        aud.alt_text = alt
        aud.caption = "" # TO DO try to find Classic stories that have Audio with captions
        aud.audio = src # Assign Audio property. TO DO fix this for hosted videos
        return aud
    
    elif tag_name == "iframe" or tag_name == "embed":
        src = el.get("src") or el.get("data-src")
        alt = el.get("alt", "")
        if src:
            emb = Embed(path=src)
            emb.alt_text = alt
            emb.caption = "" # TO DO try to find Classic stories that have Embeds with captions
            emb.link = src
        return emb

    elif tag_name == "map":
        src = el.get("src")
        alt = el.get("alt", "")
        extent = "" #TO DO get extent
        layers = "" # TO DO get map layers
        mp = Map(item="")
        mp.alt_text = alt
        mp.caption = "" # TO DO try to find Classic stories that have Maps in Sidecar panel with captions
        mp.map = src
        mp.map_layers = layers 
        mp.set_viewpoint = extent
        return aud
    
    else:
        # Fallback for unsupported or unknown types - treat as text
        inner_html = ''.join(str(c) for c in el.contents)
        processed_html = process_html_colors_preserve_html(inner_html)
        return Text(text=processed_html, style=TextStyles.PARAGRAPH)

def parse_root_elements(html_snippet):
    soup = BeautifulSoup(html_snippet, "html.parser")
    html_elements = []
    for child in soup.contents:
        if not getattr(child, 'name', None):
            continue

        # Check if the parent itself is meaningful
        has_text = child.get_text(strip=True) != ""
        has_img = child.find('img') is not None
        has_video = child.find('video') is not None
        has_audio = child.find('audio') is not None
        has_iframe = child.find('iframe') is not None
        has_embed = child.find('embed') is not None
        has_map = child.find('map') is not None
        is_meaningful = has_text or has_img or has_video or has_audio or has_iframe or has_embed or has_map

        # Check for meaningful children
        meaningful_children = []
        for c in child.children:
            if not getattr(c, 'name', None):
                continue
            c_has_text = c.get_text(strip=True) != ""
            c_has_img = c.find('img') is not None
            c_has_video = c.find('video') is not None
            c_has_audio = c.find('audio') is not None
            c_has_iframe = c.find('iframe') is not None
            c_has_embed = c.find('embed') is not None
            c_has_map = c.find('map') is not None
            if c_has_text or c_has_img or c_has_video or c_has_audio or c_has_iframe or c_has_embed or c_has_map:
                meaningful_children.append(c)

        # If there are meaningful children, add them
        if meaningful_children:
            html_elements.extend(meaningful_children)
            # Optionally, if the parent is also meaningful and not just a container, add it too
            # If you want to avoid duplicates, only add children
            continue

        # If no meaningful children, but parent is meaningful, add parent
        if is_meaningful:
            html_elements.append(child)

    return html_elements


def parse_nested_elements(html_snippet):
    soup = BeautifulSoup(html_snippet, "html.parser")
    soup_list = [child for child in soup.contents if getattr(child, 'name', None)]
    html_elements = []
    for element in soup_list:
        for c in element:
            if getattr(c, 'name', None):
                html_elements.append(c)
    return html_elements

def convert_html_elements_to_storymap_node(html_elements):
    content_nodes = []
    image_metadata = []  # To store (img, caption, alt, link) tuples
    for el in html_elements:
        node = convert_element_to_storymap_object(el)
        if isinstance(node, tuple):
            img, caption, alt, link = node
            content_nodes.append(img)
            image_metadata.append((img, caption, alt, link))
        elif node:
            content_nodes.append(node)
    return content_nodes, image_metadata

# def get_thumbnail_path(path):
#     if path and isinstance(path, str) and os.path.isfile(path):
#         return path
#     return default_thumbnail_path

def get_thumbnail_path(path):
    try:
        if path and isinstance(path, str):
            # Try local file check, otherwise use default
            with open(path, 'rb'):
                return path
    except Exception:
        pass
    # Use default thumbnail path hosted online
    return default_thumbnail_path

# def download_thumbnail(webmap_item, thumbnail_path, gis=None):
#     if webmap_item.thumbnail:
#         url = f"{webmap_item._portal.resturl}content/items/{webmap_item.id}/info/{webmap_item.thumbnail}"
#         token = gis._con.token if gis else None
#         params = {'token': token} if token else {}
#         response = requests.get(url, params=params)
#         with open(thumbnail_path, 'wb') as f:
#             f.write(response.content)
#         return thumbnail_path
#     else:
#         print("No thumbnail available for this web map. Using default thumbnail.")
#         return default_thumbnail_path
    
# For downloads, use in-memory BytesIO where possible
def download_thumbnail(webmap_item, thumbnail_path, gis=None):
    try:
        url = f"{webmap_item._portal.resturl}content/items/{webmap_item.id}/info/{webmap_item.thumbnail}"
        token = gis._con.token if gis else None
        params = {'token': token} if token else {}
        response = requests.get(url, params=params)
        #response = requests.get(url)
        img = PILImage.open(BytesIO(response.content))
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
        img.save(temp_file.name)
        return temp_file.name
    except Exception:
        print("Thumbnail download failed; using default.")
        return default_thumbnail_path

def ensure_local_thumbnail(thumbnail_path, local_filename="default_storymap_thumbnail.png"):
    # If it's already a local file, just return it
    if thumbnail_path and os.path.isfile(thumbnail_path):
        return thumbnail_path
    # Otherwise, download it
    response = requests.get(thumbnail_path)
    with open(local_filename, "wb") as f:
        f.write(response.content)
    return local_filename

# def create_webmap_thumbnail(webmap_id, thumbnail_path):
#     webmap_item = gis.content.get(webmap_id)
#     webmap = GIS.map(webmap_item)
#     # Export map as image (extent, size, etc. can be specified)
#     img_bytes = webmap.export_map(size=[800, 600])
#     img = Image.open(BytesIO(img_bytes))
#     img.thumbnail((200, 133))
#     img.save(thumbnail_path)

def create_image_thumbnail(image_url, thumbnail_path):
    response = requests.get(image_url)
    img = PILImage.open(BytesIO(response.content))
    img.thumbnail((800, 600))
    img.save(thumbnail_path)
    return thumbnail_path

## 1. Fetch and Parse Classic StoryMap Data

This cell fetches the classic StoryMap item and parses its JSON data.

In [None]:
# Cell 1: Fetch and parse classic StoryMap data
def fetch_classic_storymap_data(classic_storymap_id, gis):
    classic_item = Item(gis=gis, itemid=classic_storymap_id)
    classic_data = Item.get_data(classic_item)
    if classic_data == {}:
        raise ValueError("ERROR: StoryMap to be converted must be hosted on ArcGIS Online.")
    elif isinstance(classic_data, dict):
        classic_item_data = classic_data
    else:
        classic_item_data = json.loads(classic_data)
    return classic_item, classic_item_data

# Usage:
classic_storymap_id = input_param2.value  # or set manually
classic_item, classic_item_data = fetch_classic_storymap_data(classic_storymap_id, gis)

## 2. Extract Story Settings and Entries

This cell extracts the theme, title, subtitle, and entries from the classic StoryMap data.

In [None]:
# Cell 2: Extract settings and entries
def extract_story_settings(classic_item_data):
    settings = classic_item_data["values"]["settings"]
    theme = settings["theme"]
    title = classic_item_data["values"]["title"]
    subtitle = classic_item_data["values"].get("subtitle", "")
    story_data = classic_item_data["values"]["story"]
    entries = story_data["entries"]
    return theme, title, subtitle, entries

classic_story_theme, classic_story_title, classic_story_subtitle, entries = extract_story_settings(classic_item_data)
print(f"Found {len(entries)} entries (tabs) in the classic StoryMap.")

## 3. Determine StoryMap Theme

This cell determines the new StoryMap theme based on the classic theme group.

In [None]:
# Cell 3: Determine theme
def determine_theme(theme):
    group = theme["colors"]["group"]
    if group == "dark":
        return Themes.OBSIDIAN
    elif group == "light":
        return Themes.SUMMIT
    else:
        return Themes.SUMMIT

new_theme = determine_theme(classic_story_theme)
print(f"Theme set to: {new_theme}")

## 4. Process a Single Entry (Tab) and Build StoryMap

Run this cell for each entry you want to convert.  
Set `entry_index` to the tab number (starting from 0).

In [None]:
# Cell 4: Process a single entry and build StoryMap
def process_entry(entry, i, new_theme, gis, default_thumbnail_path):
    entry_title = entry.get("title")
    media_info = entry.get("media", {})
    media_type = media_info.get("type")
    main_stage_content = None
    thumbnail_path = None
    invalid_webmap = False

    if media_type == "webmap":
        webmap_id = media_info.get('webmap', {}).get('id')
        if webmap_id:
            try:
                main_stage_content = Map(webmap_id)
                thumbnail_path = download_thumbnail(Item(gis=gis, itemid=webmap_id), f"webmap_thumbnail_{i}.png", gis)
                if not os.path.isfile(thumbnail_path):
                    thumbnail_path = default_thumbnail_path
            except Exception as e:
                print(f"Error processing webmap {webmap_id}: {e}")
                invalid_webmap = True
    elif media_type == "webpage":
        webpage_url = media_info.get("webpage", {}).get("url")
        if webpage_url:
            main_stage_content = Embed(webpage_url)
    elif media_type == "image":
        image_url = media_info.get("image", {}).get("url")
        if image_url:
            main_stage_content = Image(image_url)
            thumbnail_path = create_image_thumbnail(image_url=image_url, thumbnail_path=f"image_thumbnail_{i}.png")
            if not os.path.isfile(thumbnail_path):
                thumbnail_path = default_thumbnail_path

    if not thumbnail_path or not os.path.isfile(thumbnail_path):
        thumbnail_path = default_thumbnail_path

    return entry_title, main_stage_content, thumbnail_path, invalid_webmap

# Usage:
entry_index = 0  # Change this to process a different tab
entry = entries[entry_index]
entry_title, main_stage_content, thumbnail_path, invalid_webmap = process_entry(entry, entry_index, new_theme, gis, default_thumbnail_path)
print(f"Processed entry: {entry_title}")

## 5. Build and Save the StoryMap for This Entry

This cell builds the StoryMap, adds content, and saves/publishes it.

In [None]:
# Cell 5: Build and save StoryMap for the entry
def build_and_save_storymap(entry, entry_index, entry_title, main_stage_content, new_theme, thumbnail_path, default_thumbnail_path):
    story = StoryMap()
    story.theme(new_theme)
    sidecar = Sidecar(style="docked-panel")
    story.add(sidecar)

    description_html = entry.get("description", "")
    content_nodes, content_image_metadata = convert_html_elements_to_storymap_node(parse_root_elements(description_html))
    sidecar.add_slide(contents=content_nodes, media=main_stage_content)

    # Set cover properties
    cover_properties = story.content_list[0]
    cover_properties.title = entry_title
    cover_properties.byline = ""
    cover_properties.date = "none"
    cover_properties.media = Image(thumbnail_path if os.path.isfile(thumbnail_path) else default_thumbnail_path)

    # Hide cover
    for k, v in story.properties['nodes'].items():
        if v['type'] == 'storycover':
            v['config'] = {'isHidden': 'true'}

    # Save and publish
    story.save(title=entry_title, tags=["auto-created"], publish=True)
    if hasattr(story, '_item'):
        published_story_item = story._item
        published_story_item.update(thumbnail=thumbnail_path)
        print(f"Published StoryMap: {entry_title}")
        return story, published_story_item
    else:
        print("Could not find item for story:", story.title)
        return story, None

# Usage:
story, published_story_item = build_and_save_storymap(entry, entry_index, entry_title, main_stage_content, new_theme, thumbnail_path, default_thumbnail_path)

## 6. (Optional) Repeat for Other Entries

Repeat cells 4 and 5 for each tab/entry you want to convert.  
You can collect all `published_story_item` objects in a list for later use.

## 7. Build a Collection from Published StoryMaps

After all entries are processed and published, run this cell to create a Collection.

In [None]:
# Cell 7: Build a collection from published StoryMaps
def build_collection(classic_item, published_storymap_items, thumbnail_paths, new_theme, default_thumbnail_path):
    collection = Collection()
    collection_title = classic_item.title
    for i, story in enumerate(published_storymap_items):
        local_thumbnail = ensure_local_thumbnail(thumbnail_paths[i], f"storymap_thumbnail_{i}.png")
        collection.add(item=story, title=story.title, thumbnail=local_thumbnail)
    collection.content[0].title = collection_title
    collection.content[0].byline = ""
    collection.theme(new_theme)
    collection.content[1].type = "tab"
    # Set the Collection thumbnail to be the same as the classic story
    classic_thumbnail_path = download_thumbnail(Item(gis=gis, itemid=classic_item.itemid), "classic_story_thumbnail.png", gis)
    collection.content[1].media = Image(path=classic_thumbnail_path if os.path.isfile(classic_thumbnail_path) else default_thumbnail_path)
    collection.save(title=collection_title, tags=["auto-created"], publish=True)
    print(f"Collection created: {collection_title}")
    return collection

# Usage:
# published_storymap_items = [item1, item2, ...]  # Collect these as you go
# thumbnail_paths = {0: "...", 1: "...", ...}
collection = build_collection(classic_item, published_storymap_items, thumbnail_paths, new_theme, default_thumbnail_path)