In [1]:
# Copyright 2020 Google LLC

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [2]:
import json
import collections
import warnings

from googleapiclient import discovery

In [3]:
# Because I'm devving locally and using app default creds, I'm suppressing
# explicitly to make my output prettier.
warnings.filterwarnings("ignore", "Your application has authenticated using end user credentials")

In [4]:
# Fetch most up to date role list and role descriptions from API

# Build Cloud IAM API client
iam = discovery.build('iam', 'v1')

raw_role_data = {}

# Build initial request
request = iam.roles().list()

print('Fetching list of all roles.')

while True:
    # Results are paginated, so iterate over the results until exhausted
    response = request.execute()

    for role in response.get('roles', []):
        name = role['name']
        if name not in raw_role_data:
            # print(f'Getting details for {name}.')
            
            # Get role details which contains included permissions
            raw_role_data[name] = iam.roles().get(name=name).execute()

    print('Still working...')
    # Get the next page
    request = iam.roles().list_next(previous_request=request,
                                    previous_response=response)
    
    if request is None:
        print('Fetched all roles.')
        break

Fetching list of all roles.
Still working...
Still working...
Fetched all roles.


In [5]:
# Dump the data to JSON as a snapshot of state of roles/permissions
# Useful for further processing or obviate the need for more API calls
print('Writing role permissions.')

with open('roles.json', 'w') as f:
    json.dump(raw_role_data, f, indent=2)

print('Done.')

Writing role permissions.
Done.


In [6]:
# Iter over API data; map roles to perms, perms to roles, counts, and unique perms
roles_to_perms = collections.defaultdict(set)
perms_to_roles = collections.defaultdict(set)
perms_counts = collections.defaultdict(int)
unique_perms = set()

for role_name, role_data in raw_role_data.items():
    for perm in role_data.get('includedPermissions', []):
        roles_to_perms[role_name].add(perm)
        perms_to_roles[perm].add(role_name)
        perms_counts[role_name] += 1
        unique_perms.add(perm)

In [7]:
print(f'Unique roles: {len(perms_counts)}')
print(f'Unique permissions: {len(unique_perms)}')

Unique roles: 485
Unique permissions: 2658


In [8]:
# Create a list of roles sorted by number of permissions in each

roles_sorted_by_perms_asc = sorted(perms_counts.items(), key=lambda x: x[1])

In [9]:
# Find any roles with permissions which are subsets of another role.
# This reduces the number of choices we have, ensuring we're selecting
# a more comprehensive (greater number of permissiones) role later

subset_roles = set()
for this_role, _ in roles_sorted_by_perms_asc:
    for other_role, other_perms in roles_to_perms.items():
        if this_role == other_role:
            continue
        
        this_perms = roles_to_perms[this_role]
        
        if this_perms.issubset(other_perms):
            subset_roles.add(this_role)
            
count = len(subset_roles)
print(f'Found {count} subset roles:')
print("\n".join(sorted(subset_roles)[:10]))
print(f'+ {count - 10} others.')

Found 446 subset roles:
roles/accessapproval.approver
roles/accessapproval.configEditor
roles/accessapproval.viewer
roles/accesscontextmanager.policyEditor
roles/accesscontextmanager.policyReader
roles/actions.Admin
roles/actions.Viewer
roles/androidmanagement.user
roles/apigee.admin
roles/apigee.analyticsAgent
+ 436 others.


In [10]:
# Select the role with the most permissions

role_with_most_perms = roles_sorted_by_perms_asc[-1][0]

In [11]:
# What role did we select?  Probably "roles/owner"–it's huge!.

role_with_most_perms

'roles/owner'

In [12]:
# The number of permissions this role includes

perms_counts[role_with_most_perms]

2578

In [13]:
# Start selection with the role with the most perms (currently roles/owner)

selected_roles = {role_with_most_perms,}

In [14]:
# Determine what permissions we still need to satisfy

remaining = unique_perms - roles_to_perms[role_with_most_perms]

In [15]:
# So how many permissions are left to solve?

len(remaining)

80

In [16]:
# While there are still permissions left to satisfy, find the next role with the
# largest number of permissions and add it to the list, and update what permissions
# are remaining.  Repeat until there are no more permissions remaining.
while remaining:
    print(f'Selected roles: {", ".join(selected_roles)}')
    print(f'Number of permissions remaining to satisfy: {len(remaining)}')
    
    # Sort remaining as sets are intrinsically unordered
    # Not strictly necessary, but the benefit here is consistent results
    # at the slight cost of performance due to the sort
    for perm in sorted(remaining):
        print(f'Looing for permission "{perm}".')
        
        # get set of roles which contain permission, removing those which
        # are subsets of another
        satisfy = perms_to_roles[perm] - subset_roles
        print(f'Satisfying roles: {", ".join(satisfy)}')

        # sort roles by the number of permissions they contain, select role
        # with the most
        sorted_roles = sorted((role, perms_counts[role])
                              for role in satisfy)
        selected = sorted_roles[-1][0]
        print(f'Role with most perms: {selected}')

        # add this role to selected roles and recalculate remaining perms
        selected_roles.add(selected)
        remaining = remaining - roles_to_perms[selected]

        # We're recalculating remaining in-place, so we have to break
        break
    
    print()

print('Done.')

Selected roles: roles/owner
Number of permissions remaining to satisfy: 80
Looing for permission "axt.labels.get".
Satisfying roles: roles/axt.admin
Role with most perms: roles/axt.admin

Selected roles: roles/owner, roles/axt.admin
Number of permissions remaining to satisfy: 77
Looing for permission "bigquery.tables.create".
Satisfying roles: roles/bigquery.admin, roles/datafusion.serviceAgent, roles/cloudasset.serviceAgent
Role with most perms: roles/datafusion.serviceAgent

Selected roles: roles/owner, roles/datafusion.serviceAgent, roles/axt.admin
Number of permissions remaining to satisfy: 56
Looing for permission "billing.accounts.close".
Satisfying roles: roles/billing.admin
Role with most perms: roles/billing.admin

Selected roles: roles/owner, roles/datafusion.serviceAgent, roles/axt.admin, roles/billing.admin
Number of permissions remaining to satisfy: 28
Looing for permission "billing.accounts.create".
Satisfying roles: roles/billing.creator
Role with most perms: roles/billi

In [17]:
# We have roles!

print("\n".join(sorted(selected_roles)))

roles/axt.admin
roles/billing.admin
roles/billing.creator
roles/compute.xpnAdmin
roles/container.hostServiceAgentUser
roles/datacatalog.categoryFineGrainedReader
roles/datafusion.serviceAgent
roles/iam.serviceAccountTokenCreator
roles/iap.httpsResourceAccessor
roles/orgpolicy.policyAdmin
roles/owner
roles/remotebuildexecution.actionCacheWriter
roles/resourcemanager.folderAdmin
roles/resourcemanager.organizationAdmin
roles/resourcemanager.projectCreator
roles/serverless.serviceAgent


In [18]:
# How many did we pick?

len(selected_roles)

16

In [19]:
# Does our selected set of roles give us every unique permission?

{perm for role in selected_roles for perm in roles_to_perms[role]} == unique_perms

True