#  <font color='red'>Geoplatform - Download JSONs - All Apps</font>
<b><u>Contents:</b></u><br>
1) Download JSONs for specified apps and items on a specified project<br>

<font color='blue' size='2'>Note:  Always run cells with blue headers before running any other processes.

#  <font color=blue>Import modules</font>

In [None]:
import os, sys, shutil
from shutil import make_archive
import arcgis
from arcgis.gis import GIS
import json
from json import loads
import csv
from colorama import Fore, Back, Style
from datetime import datetime
print(Fore.BLUE+"Modules Imported "+Style.RESET_ALL+str(datetime.now().strftime("%m/%d/%Y %H:%M "))+(time.localtime().tm_zone))

# <font color=blue>Project variables</font>

In [None]:
### INPUT CELL ###

# methodToGetItems = "ITEM_ID" or "KEYWORDS_AND_USERS" sets which method will be used to find items and download JSONs
#     ITEM_ID requires an input list of item ID's in a csv file, one Item ID per row with no header.
#     KEYWORDS_AND_USERS will loop through specified users' content folders and search for keywords in tags and item titles.
methodToGetItems = 'KEYWORDS_AND_USERS'

# if using ITEM_ID above, enter filepath and filename of csv list of item ID's.
inputCSVitemIDs = "E:\\Folder\\Subfolder\\Subfolder\\Subfolder\\csv_ItemList.csv"

#Provide keywords to search Geoplatform item titles and tags (for writeJSON function)
            #NOTE: item title can be searched with partial matches
tagKeywordList = ['Tag keywords 1','Tag keywords 2','Tag keywords 3']

# Provide list(s) of Geoplatform user names relevant to the project (for searching user content):
usersR9 = ['User1_EPAEXT','User2_EPAEXT','User3_EPAEXT','User4_EPAEXT', \
           'User5_EPAEXT',]

usersSTART = ['User6_EPAEXT','User7_EPAEXT','User8_EPAEXT', \
              'User9_EPAEXT','User10_EPAEXT','User11_EPAEXT','User12_EPAEXT', \
              'User13_EPAEXT','User14_EPAEXT']

usersR9EPA = ['User1_EPA','User2_EPA']
            
usersR9TC = ['User15_EPAEXT','User16_EPAEXT','User17_EPAEXT','User18_EPAEXT', \
             'User19_EPAEXT','User20_EPAEXT']

usersOther = ['User21_EPAEXT']

# Choose userNames to assebmle from lists above: 1 = R9 START GIS, 2 = All START GIS, 3 = R9 EPA, 4 = R9 Tech Center, 5 = Other
usersOptions = (1,2,3,4,5) # enter only the numbers that apply from list above.
            
#Specify item types to download JSON #'Map Service',
itemTypes=['Feature Service','Service Definition','Dashboard','StoryMap', \
           'Web Mapping Application','Web Map','Form','Notebook', 'File Geodatabase', \
           'Hub Initiative','Hub Initiative Template','Hub Site Application','Hub Page','Hub Project', \
           'Web Experience','FeatureLayer','QuickCapture Project']

# Specify a project name (will be added to output folders, short with no spaces recommended.)
    # example options:  the name of the Geoplatform folder, or the project name, or a task like "ContentManagement"
projectName = 'ExampleProjectName'

#Output directory for item subfolders/JSONs, only edit the "outputFolder" variable:
folder_datestamp = str(datetime.now().strftime("_%Y%m%d")) #Do not edit
folder_timestamp = str(datetime.now().strftime("%H%M")) #Do not edit
outputFolder=str('\\\\serverIPaddress\\ContractFolder\\ProgramFolder\\ProjectFolder\\Scripts\\GeoplatformBackups\\'+projectName+folder_datestamp+'\\'+folder_timestamp+'\\') #format example: str('C:\\PythonTemp\\Output\\')
print(Fore.BLUE+"Output folder: "+Style.RESET_ALL+outputFolder)

###INPUT SECTION END###

In [None]:
#Assemble list of userNames from numeric options set in usersOptions variable:
def appendUsernamesList():
    usersDictionary = {1 : usersR9, 2 : usersSTART, 3 : usersR9EPA, 4 : usersR9TC, 5 : usersOther}
    userNamesCache =[]
    for usersOption in usersOptions:
        appendNames = userNamesCache+(usersDictionary[usersOption])
        userNamesCache = appendNames
    return userNamesCache
userNames = appendUsernamesList()
print(Fore.BLUE+"Username list: "+Style.RESET_ALL+str(userNames))

# checks if the required directory exists, and creates it if not: 
for subFolders in itemTypes:
    os.makedirs(outputFolder+'JSON\\'+subFolders)
    print(Fore.RED+"Directory created: "+Style.RESET_ALL+str(outputFolder)+'JSON\\'+subFolders)

errorLog = open(outputFolder+"JSON\\JSONitems_ERROR.txt", "w") # list items that failed to export from Geoplatform (writeJSON function ).

# <font color=blue>Connect to Portal</font>
- <font color=red>Important:</font>  Connection method in this script requires you to have ArcGIS Pro logged into the correct ArcGIS Online Organization target (U.S. EPA Geoplatform).

In [None]:
### ArcGIS portal url and login ###
portal = GIS('pro')
token = portal._con.token
print(Fore.BLUE+"Logged in as: "+Style.RESET_ALL+str(portal.properties.user.username))

# 1) Download JSON's for items on the project

In [None]:
def get_item_json(item): # Get the item's data in JSON format
    print("         Downloading JSON...")
    item_data = item.get_data(False)
    return (item_data)
    
def writeJSON(item,dictDesc,jsonData): # Write JSON to appropriate formatting based on item.type.
    targetDirectory=outputFolder+"JSON\\"+item.type+"\\"
    if item.type=="Form" or item.type=="Notebook":
        #zip file move
        #fileName=os.path.basename(jsonData) #used previously, can cause errors on form names feeding a dictionary instead of string
        cleanTitle5=(item.title).replace("(", "_")
        cleanTitle4=cleanTitle5.replace(")", "_")
        cleanTitle3=cleanTitle4.replace('"', "_")
        cleanTitle2=cleanTitle3.replace(":", "_")
        cleanTitle1=cleanTitle2.replace("/", "_")
        cleanTitle=cleanTitle1.replace("?","_")
        shutil.move(jsonData, targetDirectory+"\\"+cleanTitle+".zip")

    elif item.type=="Map Service" or item.type=="Image Service" or item.type=="FeatureLayer" \
                or item.type=="Feature Service" or item.type=="File Geodatabase":
        #get description
        cleanTitle5=(item.title).replace("(", "_")
        cleanTitle4=cleanTitle5.replace(")", "_")
        cleanTitle3=cleanTitle4.replace('"', "_")
        cleanTitle2=cleanTitle3.replace(":", "_")
        cleanTitle1=cleanTitle2.replace(r"/","_")
        cleanTitle=cleanTitle1.replace(r"?","_")
        r_desc = open(targetDirectory+cleanTitle+r"_desc.json", "w")
        r_desc.write(json.dumps(dictDesc))
        r_desc.close()
            
    elif item.type=="Web Map" or item.type=="Dashboard" or item.type=="Web Mapping Application" \
                or item.type=="StoryMap" or item.type=="Hub Initiative" or item.type=="Hub Initiative Template" \
                or item.type=="Hub Site Application" or item.type=="Hub Page" or item.type=="Hub Project" \
                or item.type=="Web Experience" or item.type=="Service Definition" \
                or item.type=="QuickCapture Project":
        #get description
        cleanTitle5=(item.title).replace("(", "_")
        cleanTitle4=cleanTitle5.replace(")", "_")
        cleanTitle3=cleanTitle4.replace('"', "_")
        cleanTitle2=cleanTitle3.replace(":", "_")
        cleanTitle1=cleanTitle2.replace(r"/","_")#remove forward slashes from title
        cleanTitle=cleanTitle1.replace(r"?","_")
        r_desc = open(targetDirectory+cleanTitle+r"_desc.json", "w")
        r_desc.write(json.dumps(dictDesc))
        r_desc.close()
            
        #get data
        r = open(targetDirectory+cleanTitle+r".json", "w", encoding='utf-8')
        try:
            r.write(str(jsonData))
            r.close()
        except Exception as e:
            errorLog.write(str(jsonData)+'\n')
            errorLog.write(str(e)+'\n')
            print (Back.RED+"ERROR ON EXPORT JSON DATA"+Style.RESET_ALL)
            print (jsonData)
    else:
        try:
            errorLog.write("This item type was not included in the script: "+str(item.title)+","+str(item.type)+","+str(item.id)+'\n')
        except Exception as e:
            errorLog.write(str(jsonData)+'\n')
            errorLog.write(str(e)+'\n')
            print (Back.RED+"ERROR ON EXPORT JSON DATA"+Style.RESET_ALL)
            print (e)
        
def loopJSON(item): # Search Geoplatform item titles and tags for key words, calls write functions if found.
    for tagKeyword in tagKeywordList:
        try:
            if tagKeyword in item.title or tagKeyword.upper() in str((item.tags)).upper(): #upper case used to standardize case in tags
                print ("      Getting JSON: "+Fore.BLUE+item.title+Style.RESET_ALL+" | "+item.type+" | "+item.id)
                jsonData = get_item_json(item) #use get_item_json function to get data
                dictDesc = dict(item) #get description
                writeJSON(item,dictDesc,jsonData) #use writeJSON function to write file depending on item type
                break
        except Exception as e:
            print (Back.RED+"UNABLE TO ITERATE ON A USER CONTENT FOLDER"+Style.RESET_ALL)
            print (e)

def searchGP_UsersKeywords_OR_itemID():
    if methodToGetItems == 'KEYWORDS_AND_USERS':
        print(Back.BLUE+"Downloading JSONs using the KEYWORDS_AND_USERS option."+Style.RESET_ALL)
        #loop through Geoplatform user's root folder and subfolders looking for key words in loopJSON function.
        #max_items in root and sub folder lines is important because default max is 100 and users with more will miss items.
        for userName in userNames:
            try:
                print("User:  "+Fore.RED+userName+Style.RESET_ALL)
                user = portal.users.get(userName)
                user_content = user.items(max_items=5000)

                # Get item ids from root folder first
                for item in user_content:
                    #if (item.type == iType):
                    loopJSON(item)

                # Get item ids from each of the folders next
                folders = user.folders
                for folder in folders:
                    print("   Folder:  "+str(folder['title']))
                    folder_items = user.items(folder=folder['title'],max_items=5000)

                    for item in folder_items:
                        #if (item.type == iType):
                        loopJSON(item)
            except Exception as e:
            print ("     Error with table joinInput:  "+Back.RED+str(e)+Style.RESET_ALL)
    if methodToGetItems == 'ITEM_ID':
        print(Back.BLUE+"Downloading JSONs using the ITEM_ID option, using input CSV file containing Item ID's."+Style.RESET_ALL)
        with open(str(inputCSVitemIDs), newline='') as inputCSV:
            csvReader = csv.reader(inputCSV)
            csvData = list(csvReader)
        for inputItemID in csvData:
            itemID = inputItemID[0]
            try:
                item = portal.content.get(itemID)
                print ("      Getting JSON: "+Fore.BLUE+item.title+Style.RESET_ALL+" | "+item.type+" | "+item.id)
                jsonData = get_item_json(item) #use get_item_json function to get data
                dictDesc = dict(item) #get description
                writeJSON(item,dictDesc,jsonData) #use writeJSON function to write file depending on item type
            except:
                print(Fore.RED+"Error: Item ID not found in Geoplatform:"+Style.RESET_ALL+itemID)
        inputCSV.close()
    else:
        print(Fore.RED+"Error: Invalid entry for variable methodToGetItems, use ITEM_ID or KEYWORDS_AND_USERS."+Style.RESET_ALL)

searchGP_UsersKeywords_OR_itemID()
errorLog.close()
inputCSVitemIDs.close()
print(Fore.GREEN+"END"+Style.RESET_ALL)