# Quick and Dirty Removal of "Drive By" users. 
A "drive by" user is one who logged in once but never did anything with AGOL.  They have no content, are not a member of any groups, have not logged in over the last year, and use no storage.  In other words, they can be safely deleted to reclaim named users in an auto-provisioning organization because when they come back, it will be like they never left.  In order to get rid of these users, we need to
1. Identify them based on the criteria above
2. Remove all their licenses, releasing them to the pool
3. Delete them
### Author: Seth Peery, Virginia Polytechnic Institute and State University
### Last Modified July 3, 2019

In [None]:
from arcgis.gis import GIS
import requests
import time
import csv
import json
import pandas
from time import strftime
from urllib.parse import urlparse
import getpass

In [None]:
# Make the connection to your ArcGIS Online Organization
orgURL = "https://virginiatech.maps.arcgis.com" # <==update for your org
orgUser = "sspeery"                             # <==update for your org
orgPwd = getpass.getpass("Enter your AGOL Administrator Password")
gis = GIS(orgURL,username=orgUser,password=orgPwd) 
orgShortName = urlparse(orgURL).hostname[:-len('.maps.arcgis.com')]

# verify that it works
try:
    org = gis.properties.name
    print ("Connected to " + org)
except exception as ex:
        print ("Error retrieving AGOL org properties.")

In [None]:
# Do a search of all our users.
# The idea is that we only want to query the portal once, then do the rest offline.
# Note that the variable max_users must be >= the number of users in the org to get everybody
# Once this search is done we get a notebook-global data structure (a pandas df) that we can use for subsequent queries
# This takes a long time to run if you have a lot of users in your organization, but you only need to run this once. 

userList = []
users = gis.users.search(max_users=9999)

for user in users:
   
    #These things come straight from the user dict
    d_esriUsername = user.username
    d_fullName = user.fullName
    d_email = user.email
    d_role = user.role
    d_storage = (user.storageUsage / 1024)
    
    #number of content items <=100 is returned by length of items arr
    d_items = len(user.items())
    #print(d_items)
    
    #VT PID is returned by stripping off the _virginiatech
    d_pid = user.username.rsplit("_"+orgShortName)[0]
    
    #last access comes from https://developers.arcgis.com/python/guide/accessing-and-managing-users/
    t_last_accessed = time.localtime(user.lastLogin/1000)
    d_lastAccess = "{}/{}/{}".format(t_last_accessed[0], t_last_accessed[1], t_last_accessed[2])
    
    #count of groups this user is a member of
    d_groupCount = len(user.groups)
    
    #Now build a data structure    
    currentUserInfo = {"pid":d_pid,
                        "esriUsername":d_esriUsername,
                        "fullName":d_fullName,
                        "email":d_email,
                        "storage":d_storage,
                        "role":d_role,
                        "lastAccess":d_lastAccess,
                        "groups":d_groupCount,
                        "items":d_items}
    userList.append(currentUserInfo)
    
# iteration done.
# now let's make a dataframe.  We'll use this later.
df = pandas.DataFrame(userList)

# Next, filter out the Enterprise Logins to create a new data structure 
# because we want to exempt legacy ESRI global accounts and service accounts
# therefore this operates only on our Enterprise Logins.
# NOTE that you need to adjust this to your environment - both your org_short_name and its length (+1 for the underscore)
vtEnterpriseLogins = df.loc[df['esriUsername'].str[-(len("_"+orgShortName)):] == "_"+orgShortName]
vtEnterpriseLogins



## "Drive by" user identification
####  I feel reasonably confident we can get rid of users who 
* own no content items
* are members of no groups
* use no storage
* have not logged in for a year  (or, precisely, have not logged in in 2018 or 2019 - you could set this to be more aggressive if needed)

... since if that user logs back in, it will be like they never left.

In [None]:
# From a dataframe of VT Enterprise Logins only, create a dataframe that meets the above criteria.
deleteList = vtEnterpriseLogins.loc[(vtEnterpriseLogins['storage'] == 0) & (vtEnterpriseLogins['items'] == 0) &(vtEnterpriseLogins['groups'] ==0) &(vtEnterpriseLogins['lastAccess'].str[:4] < '2018')]
deleteList


In [None]:

# Optionally, Serialize the list of users who are candidates for deletion so we can take a look at it in Excel.  (Out of band human inspection)
deleteList.to_csv('driveby-'+strftime("%Y%m%d-%H%M%S",time.localtime())+".csv")


## Deprovision all user licenses and entitlements
Before we can actually delete a user we have to get rid of all their entitlements, releasing all add-on licenses. 
We can either do this by attempting to revoke every entitement in the org or by inspecting the specific user via the REST API to get their actual entitlements and just revoking what they have.

See https://github.com/sspeery/educ2019/blob/master/GetOrganizationEntitlements.ipynb

In [None]:
# Retrieve all the entitlements in the org
lics = gis.admin.license.all()
licensedProducts = []   # array of the specific Products for which there may be entitlements
licenseArr = []         # array of the actual licenses for the Products
for lic in lics:
    try:
        licensedProducts.append(json.loads(str(lic.properties))['listing']['title'])
    except KeyError:
        pass

entDf = pandas.DataFrame()  # this stores your organization's entitlements total/assigned/remaining for each product in a dataframe
for product in licensedProducts:
    try:
        theLicense = gis.admin.license.get(product)
        licenseArr.append(theLicense)
        thisProductReportDf = theLicense.report #gets the total/assigned/remaining licenses for each product
        thisProductReportDf['Product'] = product
        entDf = entDf.append(thisProductReportDf)
    except KeyError:
        pass
    
# Create a dictionary of the form used by the UMich license assignment script
# See also: https://gist.github.com/sspeery/190d6dc45bc8f4c4b3ba0e2705850463
entitlements = dict.fromkeys(entDf['Product'].unique())
for key in entitlements:
    entitlements[key]=[]
for index, row in entDf.iterrows():
    entitlements[row['Product']].append(row['Entitlement'])
entitlements

In [None]:
# Now for each user in the deletion list we need to try and revoke each entitlement for everything they might have
justTheDriveByUserNames = deleteList[deleteList.columns[1]]

# Retrieve all the entitlements in the org
lics = gis.admin.license.all()
licensedProducts = []   # array of the specific Products for which there may be entitlements
licenseArr = []         # array of the actual licenses for the Products
for lic in lics:
    try:
        licensedProducts.append(json.loads(str(lic.properties))['listing']['title'])
    except KeyError:
        pass
for sUserToDelete in justTheDriveByUserNames:
    print ("Removing Entitlements for user " + sUserToDelete+"...")
    for product in licensedProducts:
        try: 
            print ("-"+ product)
            theLicense = gis.admin.license.get(product)
            theLicense.revoke(username=sUserToDelete, entitlements='*')
        except:
            print ("I'm just not worrying about this right now")
        
    

In [None]:
# To actually delete the user we need a user object, so we will use the portal method gis.users.search() with a username (_virginiatech)  as input.
for sUserToDelete in justTheDriveByUserNames:
    print ("Deleting " + sUserToDelete +"...")
    # right now we just make a roundtrip to the server for this reference
    userObjectToDelete = gis.users.search(sUserToDelete, max_users=1)
    userObjectToDelete[0].delete()
    print ("    Deprovisioned "+ userObjectToDelete[0].username)