# Story protector
This notebook crawls the items, maps, dashboards, scenes, within a story and delete protects those items and their content provided it is within your org.

## How to run
1. Provide the `itemId` of your story to the `story_id` parameter below.
2. Configure `delete_protect` to set whether you would like to apply **delete protection** to the story and all of the content items found within it. **True** = protect items, **False** = leave unprotected.
3. Configure `share` to set whether you would like to perform a bulk update of the sharing permissions for the story and all of its content.
4. If `share` is set to **True**, provide a sharing level 'private', 'org', or 'public'
5. Once parameters have been configured, click 'Cell' > 'Run All' in the menu bar above.
6. Scroll down in the notebook and inspect the results.


In [None]:
## These are the input parameters
story_id = '' # <-- Paste your story itemId here
delete_protect = True # <- toggle the delete protection ON (True) or OFF (False)
## If the `share` setting below is False then this setting won't be configured and the `share_level` will also be ignored.
share = False # <- if you want to bulk share the content set this to True otherwise, False
share_level = 'public' # <- set this to ['private', 'org', or 'public']

## Script setup
These are functions that do smaller tasks within the main script. For instance, some crawl specific items like dashboards or webmaps and other crawl nested group layers within a webmap.

Storing them here is just easier and makes bits of code re-usable.

### Import the packages

In [None]:
from arcgis.gis import GIS
from arcgis.gis import Item
from arcgis.mapping import WebMap, WebScene
from arcgis.apps.expbuilder.expbuilder import WebExperience
import re # import regex
import pandas as pd

# Set Pandas dataframe display options
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns',500)

### Helper functions

In [None]:
# Define embed types to look for
embedTypes = ['dashboards', 'survey123', 'experience'] ## <- More here?

# Empty container to eventually hold all of the items found within the story
itemList = []

# This class will allow for building a graph later
# Define a relation class to store all the found relationships
class Relation:
    _relations_list = []
    item = ''
    resource = ''

    def __init__(self, itemId, resourceId):
        self._relations_list.append(self)
        self.item = itemId
        self.resource = resourceId

# Define a crawler helper to fetch the item info
def getResourceInfo(resourceId, itemList):
    query = f"id: {resourceId}"
    resource = gis.content.advanced_search(query=query, max_items=-1, as_dict=True)['results']
    if len(resource) > 0:
        itemList += resource


# Crawl story nodes for embeds of ArcGIS Apps
def crawl_story_embeds(story_json, found_data, itemList):
    if story_json.get('nodes') is not None and found_data:
        for resource, value in story_json['nodes'].items():
            if value['type'] == 'embed' and 'url' in value['data'].keys():# and value['data']['title'] in embedTypes:
                if re.search(r'[\/]dashboards[\/]|\/experience[.]|\/survey123[.]',value['data']['url'],re.IGNORECASE):
                    embedApp = str(re.search(r'[\/]dashboards[\/]|\/experience[.]|\/survey123[.]',value['data']['url'],re.IGNORECASE).group())[1:-1]
                    if embedApp == 'dashboards':
                        embedUrl = value['data']['url']
                        resourceId = embedUrl.split('/')[-1]
                        getResourceInfo(resourceId, itemList)
                        Relation(story_id, resourceId) ###### <-
                        crawl_dashboard(gis, resourceId, itemList)
                    elif embedApp == 'experience':
                        embedUrl = value['data']['url']
                        resourceId = embedUrl.split('/')[-1]
                        getResourceInfo(resourceId, itemList)
                        Relation(story_id, resourceId) ###### <-
                        crawl_experience(resourceId, itemList)
                    elif embedApp == 'survey123':
                        embedUrl = re.search(r'\&id=(.*?)\&', value['data']['url'], re.IGNORECASE)
                        resourceId = embedUrl.group(1)
                        getResourceInfo(resourceId, itemList)
                        Relation(story_id, resourceId) ###### <-
                    else:
                        pass
                else:
                    pass

        
# Crawl story resources for webmaps and other resources                 
def crawl_story_resources(story_json, found_data, itemList): 
    if story_json.get('resources') is not None and found_data:
        for resource, value in story_json['resources'].items():
            if value['type'] == 'webmap' or value['type'] == 'webscene':
                resourceId = value['data']['itemId']
                getResourceInfo(resourceId, itemList)
                Relation(story_id, resourceId) ###### <-
                crawl_webmap(resourceId, itemList)


            # Check for story theme
            elif value['type'] == 'story-theme':
                if 'themeItemId' in value['data']:
                    resourceId = value['data']['themeItemId']
                    getResourceInfo(resourceId, itemList)
                    Relation(story_id, resourceId) ###### <-
        
        
# ArcGIS Dashboards
def crawl_dashboard(gis, itemId, itemList):
    dashboard = Item(gis = gis, itemid = itemId)
    found_data = False
    # check for dashboardData
    try:
        dashboard_json = dashboard.get_data()
        found_data = True
    except:
        print('could not find data')
    # Crawl dashboard for views and widget datasets within
    if dashboard_json is not None and found_data:
        if 'desktopView' in dashboard_json.keys():
            for widget in dashboard_json['desktopView']['widgets']:
                # if widget has a direct itemId
                if 'itemId' in widget:
                    resourceId = widget['itemId']
                    getResourceInfo(resourceId, itemList)
                    Relation(itemId, resourceId) ###### <-
                    if 'type' in widget and widget['type'] == 'mapWidget':
                        crawl_webmap(resourceId, itemList)
                        
                # elif widget is based on a dataset
                elif 'datasets' in widget and 'dataSource' in widget['datasets']:
                    for dataset in widget['datasets']:
                        if 'itemId' in dataset['dataSource']:
                            resourceId = dataset['dataSource']['itemId']
                            getResourceInfo(resourceId, itemList)
                            Relation(itemId, resourceId) ###### <-
        elif 'widgets' in dashboard_json.keys():
            for widget in dashboard_json['widgets']:    
                # if widget has a direct itemId
                if 'itemId' in widget:
                    resourceId = widget['itemId']
                    getResourceInfo(resourceId, itemList)
                    Relation(itemId, resourceId) ###### <-
                # elif widget is based on a dataset
                elif 'datasets' in widget and 'dataSource' in widget['datasets']:
                    for dataset in widget['datasets']:
                        if 'itemId' in dataset['dataSource']:
                            resourceId = dataset['dataSource']['itemId']
                            getResourceInfo(resourceId, itemList)
                            Relation(itemId, resourceId) ###### <-
        

# ArcGIS Web Experiences
def crawl_experience(itemId, itemList):
    experience = WebExperience(item=itemId)
    # Return datasource dictionary
    sources = experience.datasources
    # if dictionary is present crawl datasources
    if sources:
        for s in sources.keys():
            resourceId = sources[s]['itemId']
            getResourceInfo(resourceId, itemList)
            if sources[s]['type'] == 'WEB_SCENE':
                crawl_webmap(resourceId, itemList)
            elif sources[s]['type'] == 'WEB_MAP':
                crawl_webmap(resourceId, itemList)
            
# Crawl group layers within a WebMap                              
def crawl_group(mapId, group, itemList):
    for l in group['layers']:
        # if group layer
        if l['layerType'] == 'GroupLayer':
            crawl_group(mapId, l, itemList)
        # if feature layer
        else:
            if 'itemId' in l:
                resourceId = l['itemId']
                getResourceInfo(resourceId, itemList)
                Relation(mapId, resourceId) ###### <-

                
# ArcGIS Online WebMaps/WebScenes
# Turns out Scenes and Map have the same data model so we can crawl both with the same function
def crawl_webmap(itemId, itemList):
    map = gis.content.get(itemId)
    map_data = map.get_data()
    
    for layer in map_data['operationalLayers']:
        if layer['layerType'] == 'GroupLayer':
            crawl_group(itemId, layer, itemList)
        else:
            if 'itemId' in layer:
                resourceId = layer['itemId']
                getResourceInfo(resourceId, itemList)
                Relation(itemId, resourceId) ###### <-
            # can't do much if I don't have the itemId
    
    for layer in map_data['baseMap']['baseMapLayers']:
        if layer['layerType'] == 'GroupLayer':
            crawl_group(itemId, layer, itemList)
        else:
            resourceId = layer['itemId']
            getResourceInfo(resourceId, itemList)
            Relation(itemId, resourceId) ###### <-


# Content discovery
The script below crawls the story data and calls the helper functions defined above to subsequently crawl the contents of items found within the story.

Once this block runs the script will return a table showing all the items found within the story.

In [None]:
# Define the GIS
gis = GIS("home")

# Define the main story crawler function
# Crawl the story to find items and record their item_id
story = Item(gis, story_id)
getResourceInfo(story_id, itemList)
# Check for storyData
found_data = False
try:
    story_json = story.nodes.get('published_data.json', try_json=True)
    found_data = True
except:
    # old story saved to item_data
    try:
        story_json = story.get_data()
        found_data = True
    except:
        print('could not find data')


# Crawl contents
crawl_story_embeds(story_json, found_data, itemList)
crawl_story_resources(story_json, found_data, itemList)

# Turn the contents from the story into a dataframe
items_df = pd.DataFrame(itemList)
# Create a convenient subset of columns
items_df = items_df[['id', 'owner', 'created', 'isOrgItem', 'modified', 'title', 'type','protected', 'access']] # drop columns except these
# Remove duplicate items
items_df = items_df.drop_duplicates(subset='id') # drop duplicate items
# Filter to only show those items that are within the 'home' org
items_df = items_df.loc[items_df['isOrgItem'] == True]

# Preview
items_df

# Protect the items
Using the table of items above, this next block will loop through those items and perform to desired protection and sharing updates.

Once complete, this block will report back an updated table of all the items for review.

In [None]:
# Now that we have a list of items we'll protect them from deletion and optionally make them public
id_list = items_df['id'].tolist()

# Function to perform the protection and sharing
def update_item_properties(item, protection, share, level):
    i = gis.content.get(item)
    i.protect(enable = protection)
    if share:
        i.update(item_properties={"access": level})

# Update the settings for each item
for item in id_list:
    try:
        update_item_properties(item, delete_protect, share, share_level)
    except:
        print('Error: Could not update "{0}".'.format(item))

## Review the results
Wait a few moments after running the above. This last cell will query those items that were protected and present an updated table where you can confirm that things were protected/shared as expected.

In [None]:
# Reset the container
itemList = []

# Re-query the items to refresh the properties
for item in id_list:
    getResourceInfo(item, itemList)

# Turn the contents from the story into a dataframe
items_df = pd.DataFrame(itemList)
# Create a convenient subset of columns
items_df = items_df[['id', 'owner', 'created', 'isOrgItem', 'modified', 'title', 'type','protected', 'access']] # drop columns except these
items_df