# To do list:
- limit the getting of data using a date
- try to get the token from the url
- clean functions that create `gis` as this will be done only once at the begining of the workflow

In [None]:
import json
import requests
import re
from pprint import pprint
import pandas as pd

# Functions

In [None]:
def executeRequest(url, headers, payload):
    
    try:
    ## Create empty object first
        r_json = {}
    
    ## cleaner way of using requests
        response = requests.get(url, headers=headers, data=payload)

    except:
        print("There was an problem in the request :(")
        return None

    ## always nice to print the url as a sanity check
    print(response.url)

    # if succesful, populate your response json
    if  response.status_code == 200:
            r_json = response.json()
    else:
        print(f'Failed to get data {response.status_code}, {response.json()}')
    
    return r_json

In [None]:
def create_strict_reg_exp(to_search):
    try:
        reg_exp = f"^{to_search}$"
    except:
        print("There was a problem with the string.")
    return reg_exp

In [None]:
def searchContactListsID(r_json, reg_exp):
    try:
        l_json = r_json.get("lists", [])
        assert type(l_json) is list and len(l_json) is not 0,  "Error with l_json"
        sel_contact_dict = {
        d['name']: d.get('list_id', '')
        for d in l_json
        if re.search(reg_exp, d['name']) != None
        }        
    except:
        print("There was a problem with the structure of the json")
    return sel_contact_dict

In [None]:
def requestURLbyListID(id_contact_list):
    try:
        url = f"https://api.cc.email/v3/contacts?lists={id_contact_list}&include=street_addresses&limit=500&include_count=false"
    except:
        print("There was a problem with the id.")
    return url

In [None]:
def getContactsLocation(r_json):
    try:
        contacts_list = []
        l_json = r_json.get("contacts", [])
        for d in l_json:
            street_address = d.get('street_addresses', [{}])#[0]
            if street_address:
                street_address = street_address[0]
                postal_code = street_address.get("postal_code", None)
                country = street_address.get("country", None)
                if postal_code and country:
                    contact_dict = {
                        'contact_id': d.get('contact_id', ''), ## Need a fallback for contact_id? No, there is always a contact_id
                        'postal_code': postal_code,
                        'country': country
                    }
                    contacts_list.append(contact_dict)  
        df = pd.DataFrame(contacts_list)
    except:
        print("There was a problem with the structure of the json")
    return df

In [None]:
def missLocation(r_json, df):
    try: 
        l_json = r_json.get("contacts", [])
        all_contacts_list = []
        for d in l_json:
            all_contacts_list.append(d.get('contact_id', ''))
        original_set = set(all_contacts_list)
        located_set = set(df.contact_id)
        contact_diff = original_set.difference(located_set)
    except:
        print("There was a problem with the structure of the json")
    return contact_diff

In [None]:
def writeLocationCsv(df, csvName):
    try:
        csv_file = f'./{csvName}.csv'
        df.to_csv(csv_file, index=False)
        print(f"{csv_file} written")
    except:
        print("The csv hasn't been written")

# Environmental variables

In [None]:
env_path = ".env"

In [None]:
with open(env_path) as f:
    env = {}
    for line in f:
        env_key, _val = line.split("=")
        env_value = _val.split("\n")[0]
        env[env_key] = env_value

In [None]:
api_key = env['cc_api_key']
token = env['cc_token']

To get the token put this in the web browser: https://api.cc.email/v3/idfed?client_id={api_key}&redirect_uri=https://localhost&response_type=token&scope=contact_data

In [None]:
f"https://api.cc.email/v3/idfed?client_id={api_key}&redirect_uri=https://localhost&response_type=token&scope=contact_data"

and then update the `.env` file. Is there a way of getting the url where this get call takes?

In [None]:
contact_lists_of_interest = ["Educator Ambassadors"]

In [None]:
list_element = contact_lists_of_interest[0]

# Getting the data from Constant Contact

In [None]:
headers = {
  'Authorization': f'Bearer {token}'
}
payload = {}

In [None]:
url = "https://api.cc.email/v3/contact_lists?include_count=false"

In [None]:
r_contact_lists = executeRequest(url, headers, payload)

In [None]:
log_dict = {} #this dictionary should be used to log the contacts_without_location
for list_element in contact_lists_of_interest:
    cl_to_search = create_strict_reg_exp(list_element)
    id_dict = searchContactListsID(r_json = r_contact_lists, reg_exp = cl_to_search)
    url_contacts = requestURLbyListID(id_dict[list_element])
    r_contacts = executeRequest(url_contacts, headers, payload)
    contacts_location_df = getContactsLocation(r_contacts)
    contacts_without_location = missLocation(r_contacts,contacts_location_df) #this should be written in some kind of log
    writeLocationCsv(contacts_location_df, list_element)

In [None]:
cl_to_search = create_strict_reg_exp(list_element)

In [None]:
contacts_without_location

# Once the csv is ready it can be published in arcgis online via the arcgis api
Here there can be different cases:
- publish a new service, if the list is a new one
- fully overwrite a service
- append data to a service

In [None]:
import arcgis
from arcgis.gis import GIS
from arcgis.features import FeatureLayerCollection
from copy import deepcopy
from arcgis.geocoding import geocode
from arcgis import geometry
import re
from pprint import pprint
import pandas as pd

Documentation on setting the content_status [here](https://developers.arcgis.com/python/api-reference/arcgis.gis.toc.html#arcgis.gis.Item.content_status)

In [None]:
def publishCSVasFS(csvName, sharing = "everyone", aol_folder_name = "constant_contact", aol_username = env['aol_username'], aol_password = env['aol_key']):
    try:
        gis = GIS("https://eowilson.maps.arcgis.com", aol_username, aol_password)
        if gis.content.is_service_name_available(csvName, "featureService"):
            print(f"Service name {csvName} is available")
            csv_file = f'./{csvName}.csv'
            csv_item = gis.content.add({}, csv_file)
            csv_lyr = csv_item.publish(None,  { 'CountryCode' : 'country',
                                            'Postal' : 'postal_code'} )
            #check how many entries has the service, compared to the number of entries in the csv, return entries that have been removed
            flayer_collection = FeatureLayerCollection.fromitem(csv_lyr)
            searched_flayer = flayer_collection.layers[0] 
            nber_features = searched_flayer.query(return_count_only=True)
            print(f"The service {csvName} has been published. The service has {nber_features} entries")
            print(f"Moving service {csvName} to {aol_folder_name} in ArcGIS Online...")
            csv_item.move(aol_folder_name)
            csv_lyr.move(aol_folder_name)
            print(f"Service {csvName} has been moved to {aol_folder_name} in ArcGIS Online")
            #sharing
            if sharing == "everyone":
                csv_lyr.share(everyone=True, org=False, groups=None, allow_members_to_edit=False)
            sharing_prop = csv_lyr.shared_with
            if sharing_prop['everyone']==True:
                print(f"shared with everyone")
            else:
                print(f"not public layer, for this layer to be used it has to be public or the urls have to be whitelisted")            
            #not allowing deleting
            csv_lyr.protect()
            try:
                csv_lyr.delete()
                print("The service has not been published.")
            except:
                print("The detele protection is activated.")
            #mark deprecated
            # it is possible to check the status with csv_item.content_status
            return csv_lyr.id
        else:
            print("The service name is not available, try overwritting, appending the data or a different service name")
        
    except:
        print("The csv hasn't been published")

In [None]:
publishCSVasFS(csvName = testing_val)

In [None]:
def findItemGetID(csvName, aol_username = env['aol_username'], aol_password = env['aol_key']):
    try:
        gis = GIS("https://eowilson.maps.arcgis.com", aol_username, aol_password)
        searched_item = gis.content.search(csvName, item_type = "Feature Layer")
        if len(searched_item) == 1:
            i = searched_item[0]
            reg_exp = create_strict_reg_exp(csvName)
            if re.search(reg_exp, i.title)!= None:    
                return i.id
            else:
                print(f"The service {csvName} does not exist with that exact name. ")
        else:
            print(f"The csvName provided returns {len(searched_item)} results.")
    except:
        print("There was a problem finding the item")

To overwrite follow [this notebook from ESRI](https://developers.arcgis.com/python/sample-notebooks/overwriting-feature-layers/). 

In [None]:
def overwriteFSwithCSV(csvName, item_id, aol_username = env['aol_username'], aol_password = env['aol_key']):
    try:
        gis = GIS("https://eowilson.maps.arcgis.com", aol_username, aol_password)
        searched_item = gis.content.get(item_id)             
        csv_file = f'./{csvName}.csv'
        try:
            flayer_collection = FeatureLayerCollection.fromitem(searched_item)
            overwrite_message = flayer_collection.manager.overwrite(csv_file)
            if overwrite_message['success'] == True:
                searched_flayer = flayer_collection.layers[0] 
                nber_features = searched_flayer.query(return_count_only=True)
                print(f"The service {csvName} has been overwritten. The service has {nber_features} entries")
        except:
                print("There was a problem overwriting the service")                
    except:
        print("The service hasn't been overwritten")

In [None]:
testing_id = findItemGetID(csvName = testing_val)
overwriteFSwithCSV(csvName = testing_val, item_id = testing_id)

To append follow [this notebook from ESRI](https://developers.arcgis.com/python/sample-notebooks/updating-features-in-a-feature-layer/).

In [None]:
def appendCSVtoFS(csvName, item_id, aol_username = env['aol_username'], aol_password = env['aol_key']): 
    csv_file = f'./{csvName}.csv'
    df = pd.read_csv(csv_file)
    item = gis.content.get(item_id)
    flayer = item.layers[0]
    fset = flayer.query()
    overlap_rows = pd.merge(left = fset.sdf, right = df, how='inner', on = 'contact_id')
    #get number of overlap rows
    features_for_update = [] #list containing corrected features
    all_features = fset.features    

In [None]:
def getFeatureSet(item_id):
    item = gis.content.get(item_id)
    flayer = item.layers[0]
    fset = flayer.query()
    return fset

In [None]:
def checkOverlap(csvName, fset):
    try:
        csv_file = f'./{csvName}.csv'
        df = pd.read_csv(csv_file)
        overlap_rows = pd.merge(left = fset.sdf, right = df, how='inner', on = 'contact_id')
        if overlap_rows:
            return overlap_rows
        else:
            return False 
    except:
        print("There has been a problem checking row overlap")

In [None]:
def updateFeaturesInService(overlap_rows, fset):
    try:
        all_features = fset.features
        features_for_update = [] #list containing corrected features
        for contact_id in overlap_rows['contact_id']:
            # get the feature to be updated
            original_feature = [f for f in all_features if f.attributes['contact_id'] == contact_id][0]
            feature_to_be_updated = deepcopy(original_feature)

            print(str(original_feature))

            # get the matching row from csv
            matching_row = df.where(df.contact_id == contact_id).dropna()

            # from the csv geocode the country and postcode
            address = {"CountryCode": matching_row['country'][0], "Postal": int(matching_row['postal_code'][0])}
            add_loc = geocode(address)
            #get geometries in the destination coordinate system
            input_geometry = add_loc[0]['location']
            #print(input_geometry)
            output_geometry = geometry.project(geometries = [input_geometry],
                                               in_sr = 4326, 
                                               out_sr = fset.spatial_reference['latestWkid'],
                                              gis = gis)
            #print(output_geometry)
            # assign the updated values
            feature_to_be_updated.geometry = output_geometry[0]    
            feature_to_be_updated.attributes['contact_id'] = matching_row['contact_id'].values[0]
            feature_to_be_updated.attributes['postal_code'] = matching_row['postal_code'].values[0]
            feature_to_be_updated.attributes['country'] = matching_row['country'].values[0]


            #add this to the list of features to be updated
            features_for_update.append(feature_to_be_updated)

            #print(str(feature_to_be_updated))
            #print("========================================================================")
            #break
        if features_for_update:
            message = flayer.edit_features(updates= features_for_update)
            print(message)
        else:
            print("no features were updated")
    except:
        print("There was a problem updating the features")

In [None]:
def checkNewRows(csvName, fset, overlap_rows):
    try:
        csv_file = f'./{csvName}.csv'
        df = pd.read_csv(csv_file)
        new_rows = df[~df['contact_id'].isin(overlap_rows['contact_id'])]
    if new_rows:
            return new_rows
        else:
            return False 
    except:
        print("There has been a problem checking for new rows")     

In [None]:
def addNewFeatures(new_rows, fset):
    try:
        features_to_be_added = []
        template_feature = deepcopy(fset[0])
        for row in new_rows.iterrows():
            #print(row['postal_code'], row['country'])   
            address = {"CountryCode": row['country'], "Postal": row['postal_code']}
            add_loc = geocode(address, out_fields="City,Country")
            #print(add_loc)
            if add_loc:
                new_feature = deepcopy(template_feature)
                #get geometries in the destination coordinate system
                input_geometry = add_loc[0]['location']
                output_geometry = geometry.project(geometries = [input_geometry],
                                               in_sr = 4326, 
                                               out_sr = fset.spatial_reference['latestWkid'],
                                              gis = gis)
                 # assign the updated values
                new_feature.geometry = output_geometry[0]
                new_feature.attributes['contact_id'] = int(row[1]['contact_id'])
                new_feature.attributes['state'] = row[1]['state']
                new_feature.attributes['capital'] = row[1]['capital']


                #add this to the list of features to be updated
                features_to_be_added.append(new_feature)

            #break
        if features_to_be_added:
            flayer.edit_features(adds = features_to_be_added)
        else:
            print("no features were added")
    except:
        print("There has been a problem adding new features")

In [None]:
def locationNotMapped(csvName, item_id):
    

In [None]:
def csvToArcgis(csvName, action, gis, aol_folder_name = "constant_contact", aol_username = env['aol_username'], aol_password = env['aol_key']):
    if action == "publish":
        published_id = publishCSVasFS(csvName)
        locationNotMapped(csvName, published_id)
    if action == "overwrite":
        item_id = findItemGetID(csvName)
        overwriteFSwithCSV(csvName, item_id)
        locationNotMapped(csvName, item_id)
    if action == "append":
        item_id = findItemGetID(csvName)
        fset = getFeatureSet(item_id)
        overlapRows = checkOverlap(csvName, fset)
        if overlapRows:
            updateFeaturesInService(csvName, fset)
            newRows = checkNewRows(csvName, fset, overlap_rows)
            if newRows:
                addNewFeatures(newRows, fset) 
        locationNotMapped(csvName, item_id)

In [None]:
aol_password = env['aol_key']
aol_username = env['aol_username']
gis = GIS("https://eowilson.maps.arcgis.com", aol_username, aol_password)
csvToArcgis(csvName, action, gis)