# UNSUPERVISED ROLE MODEL (clustering based on user permissions)

## Load the data

In [1]:
import json
import pandas as pd

with open("data/raw/permissions.json") as f:
    permissions = json.load(f)
with open("data/raw/user_permissions.json") as f:
    user_permissions = json.load(f)

## Build User-Permission Matrix

In [2]:
import numpy as np

user_list = list(user_permissions.keys())
perm_list = permissions

# Initialize matrix
matrix = np.zeros((len(user_list), len(perm_list)), dtype=int)

# Populate matrix
perm_index = {perm: i for i, perm in enumerate(perm_list)}
for user_idx, user in enumerate(user_list):
    for perm in user_permissions[user]:
        matrix[user_idx][perm_index[perm]] = 1

df = pd.DataFrame(matrix, index=user_list, columns=perm_list)


## Apply a clustering algorithm

In [3]:
from sklearn.cluster import KMeans

# Try different numbers of roles
k = 10
model = KMeans(n_clusters=k, random_state=42)
labels = model.fit_predict(df)

# Assign cluster labels (suggested roles)
df["suggested_role"] = labels


## Inspect suggested roles

In [4]:
# Suggested roles = clusters of similar permission sets
for i in range(k):
    cluster_members = df[df["suggested_role"] == i]
    cluster_perms = cluster_members.drop(columns=["suggested_role"]).mean().round(2)
    print(f"\nSuggested Role {i+1} Permissions:\n", cluster_perms[cluster_perms > 0.5].index.tolist())


Suggested Role 1 Permissions:
 ['perm_3', 'perm_9', 'perm_13', 'perm_15', 'perm_21', 'perm_22']

Suggested Role 2 Permissions:
 ['perm_2', 'perm_13', 'perm_19']

Suggested Role 3 Permissions:
 ['perm_16', 'perm_21']

Suggested Role 4 Permissions:
 ['perm_5', 'perm_6', 'perm_15', 'perm_30']

Suggested Role 5 Permissions:
 ['perm_14']

Suggested Role 6 Permissions:
 ['perm_19']

Suggested Role 7 Permissions:
 ['perm_2', 'perm_14', 'perm_19']

Suggested Role 8 Permissions:
 ['perm_3', 'perm_5', 'perm_6', 'perm_9', 'perm_13', 'perm_15', 'perm_21', 'perm_22', 'perm_30']

Suggested Role 9 Permissions:
 ['perm_7']

Suggested Role 10 Permissions:
 ['perm_2', 'perm_16', 'perm_21', 'perm_24']
