Given a context

In [1]:
import pandas as pd
import numpy as np
from faker import Faker
import random
import seaborn as sns
import pandas as pd

import matplotlib.pyplot as plt


In [None]:

# Initialize Faker to generate fake data
fake = Faker()

# Define the size of the DataFrame
num_rows = 8
num_cols = 5

# --- Generate Row Labels (Random Names) ---
# Use a set to ensure uniqueness easily
unique_names = set()
while len(unique_names) < num_rows:
    unique_names.add(fake.name())
# Convert the set back to a list for the DataFrame index
row_labels = list(unique_names)

# --- Generate Column Headers (Random Security Permissions) ---
# Create a list of plausible permission-sounding words/phrases
possible_permissions = [
    "Read", "Write", "Execute", "Delete", "Admin Access",
    "View Audit Log", "Manage Users", "Create Reports", "Modify Settings",
    "Approve Requests", "Upload Files", "Download Files", "Share Content",
    "System Config", "Network Access", "Database Write", "Database Read",
    "API Access", "Grant Permission", "Revoke Permission"
]

# Ensure we have enough unique permissions to choose from
if len(possible_permissions) < num_cols:
    # If not, add generic ones
    needed = num_cols - len(possible_permissions)
    for i in range(needed):
        possible_permissions.append(f"GenericPerm{i+1}")

# Randomly sample unique permissions for the headers
column_labels = random.sample(possible_permissions, num_cols)

# --- Generate Random Data ---
# Create a 10x10 matrix of random boolean values (True/False)
# representing whether a user has a specific permission.
# You could also use random numbers (e.g., np.random.rand) if preferred.
data = np.random.choice([True, False], size=(num_rows, num_cols))

# --- Create the DataFrame ---
df = pd.DataFrame(data, index=row_labels, columns=column_labels)

# Sort the DataFrame based on each column from left to right
df = df.sort_values(by=df.columns.tolist(), ascending=True)


In [10]:
# Specify the path to the CSV file
FILE_NAME_LIST = "fake_names_list.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(FILE_NAME_LIST)
# Set the 'Name' column as the row label and remove it from the DataFrame

df.set_index('Name', inplace=True)
# Display the DataFrame

print(df)

                 Research Scientist  Data Analyst  Lab Technician  \
Name                                                                
Alex Jackson                   True         False           False   
Alex Johnson                  False         False           False   
Alex Smith                    False         False           False   
Alex Taylor                   False         False           False   
Alex Thomas                   False         False           False   
Alex White                    False         False           False   
Casey Anderson                False         False           False   
Casey Brown                   False         False           False   
Casey Harris                  False         False           False   
Casey Jackson                  True         False           False   
Casey Smith                   False         False           False   
Casey Taylor                  False         False           False   
Casey White                   Fals

In [None]:

# Count the number of rows and columns in the DataFrame
num_rows, num_cols = df.shape
# --- Display the DataFrame ---
print(f"A {num_rows}x{num_cols} DataFrame:\n")
# Replace True with 1 and False with an empty string
df_display = df.replace({True: 1, False: ''})

# Display the modified DataFrame
# print(df_display)

# Display the DataFrame as a heatmap chart
# plt.figure(figsize=(num_cols, num_rows))
# plt.title("Attributes Heatmap")
# sns.heatmap(df.replace({True: 1, False: 0}), annot=df_display, fmt='', cmap='coolwarm', cbar=False)
# plt.title("Permissions Heatmap")
# plt.show()

# You can also inspect parts of it:
# print("\nDataFrame Info:")
# df.info()
print("\nFirst 5 rows:")
print(df.head())


A 57x70 DataFrame:


DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
Index: 57 entries, Alex Jackson to Taylor Johnson
Data columns (total 70 columns):
 #   Column                     Non-Null Count  Dtype
---  ------                     --------------  -----
 0   Research Scientist         57 non-null     bool 
 1   Data Analyst               57 non-null     bool 
 2   Lab Technician             57 non-null     bool 
 3   Principal Investigator     57 non-null     bool 
 4   Postdoctoral Fellow        57 non-null     bool 
 5   Research Assistant         57 non-null     bool 
 6   Project Manager            57 non-null     bool 
 7   Technical Writer           57 non-null     bool 
 8   Software Engineer          57 non-null     bool 
 9   Bioinformatician           57 non-null     bool 
 10  Statistician               57 non-null     bool 
 11  Clinical Researcher        57 non-null     bool 
 12  Grant Coordinator          57 non-null     bool 
 13  Regulatory Specialist      

find permisisons that we don't want to include in roles; permissions that only have a few members

In [2]:
# Count the number of assignments (occurrences of '1') for each permission
permission_counts = df_display.applymap(lambda x: 1 if x == 1 else 0).sum()

# Convert the result to a DataFrame for better readability
permission_summary = permission_counts.reset_index()
permission_summary.columns = ['Permission', 'Number of Assignments']
permission_summary = permission_summary.sort_values(by='Number of Assignments', ascending=True)
# Display the summary
print(permission_summary)

       Permission  Number of Assignments
2      API Access                      2
4    Admin Access                      3
1   Database Read                      4
0          Delete                      5
3  Create Reports                      6


  permission_counts = df_display.applymap(lambda x: 1 if x == 1 else 0).sum()


Now remove low permisison counts from the matrix

In [3]:
filter_under = 2
# Filter permissions with x or more assignments

permissions_to_keep = permission_counts[permission_counts >= filter_under].index

# Update the df dataframe to only include these permissions
df = df[permissions_to_keep]

# Display the updated dataframe
print(df)

                    Delete  Database Read  API Access  Create Reports  \
Jill Atkins          False          False       False           False   
Sarah Cooper         False           True       False            True   
Christopher Oliver   False           True        True            True   
Charles Vasquez       True          False       False            True   
Meghan Bolton         True          False       False            True   
Brian Tran            True          False        True            True   
Samantha Bowen        True           True       False           False   
Valerie Hamilton      True           True       False            True   

                    Admin Access  
Jill Atkins                False  
Sarah Cooper               False  
Christopher Oliver         False  
Charles Vasquez            False  
Meghan Bolton              False  
Brian Tran                  True  
Samantha Bowen              True  
Valerie Hamilton            True  


craft a table to generate a lattice

In [4]:
from tabulate import tabulate
import re

# Convert the dataframe to an ASCII table
ascii_table_no_horizontal = tabulate(df, headers='keys', tablefmt='pipe')
# Use regex to remove the header row with mostly hyphens
ascii_table_no_horizontal = re.sub(r'\|:.*\n', '', ascii_table_no_horizontal)

# Use regex to remove the very first pipe character in the ASCII table
ascii_table_no_horizontal = re.sub(r'^\|', '', ascii_table_no_horizontal, flags=re.MULTILINE)
# Replace '1' with 'X' and '0' with a blank space in the ASCII table
ascii_table_no_horizontal = ascii_table_no_horizontal.replace('1', 'X').replace('0', ' ')
print(ascii_table_no_horizontal)

                    |   Delete |   Database Read |   API Access |   Create Reports |   Admin Access |
 Jill Atkins        |          |                 |              |                  |                |
 Sarah Cooper       |          |               X |              |                X |                |
 Christopher Oliver |          |               X |            X |                X |                |
 Charles Vasquez    |        X |                 |              |                X |                |
 Meghan Bolton      |        X |                 |              |                X |                |
 Brian Tran         |        X |                 |            X |                X |              X |
 Samantha Bowen     |        X |               X |              |                  |              X |
 Valerie Hamilton   |        X |               X |              |                X |              X |


In [5]:
from concepts import Context
from concepts.visualize import lattice

# https://concepts.readthedocs.io/en/stable/manual.html#formal-contexts

# Convert the DataFrame to a formal context
context = Context.fromstring(ascii_table_no_horizontal)

# Eval the closest matching concept for two users
#context['Casey Taylor', ]
#context['Read','Database Write']



In [6]:
l = context.lattice
# Show the concept lattice
for extent, intent in l:
    print('%r %r' % (extent, intent))
# more general at the top, more specific at the bottom
# superconcepts, and subconcepts

() ('Delete', 'Database Read', 'API Access', 'Create Reports', 'Admin Access')
('Christopher Oliver',) ('Database Read', 'API Access', 'Create Reports')
('Brian Tran',) ('Delete', 'API Access', 'Create Reports', 'Admin Access')
('Valerie Hamilton',) ('Delete', 'Database Read', 'Create Reports', 'Admin Access')
('Christopher Oliver', 'Brian Tran') ('API Access', 'Create Reports')
('Brian Tran', 'Valerie Hamilton') ('Delete', 'Create Reports', 'Admin Access')
('Samantha Bowen', 'Valerie Hamilton') ('Delete', 'Database Read', 'Admin Access')
('Sarah Cooper', 'Christopher Oliver', 'Valerie Hamilton') ('Database Read', 'Create Reports')
('Brian Tran', 'Samantha Bowen', 'Valerie Hamilton') ('Delete', 'Admin Access')
('Sarah Cooper', 'Christopher Oliver', 'Samantha Bowen', 'Valerie Hamilton') ('Database Read',)
('Charles Vasquez', 'Meghan Bolton', 'Brian Tran', 'Valerie Hamilton') ('Delete', 'Create Reports')
('Charles Vasquez', 'Meghan Bolton', 'Brian Tran', 'Samantha Bowen', 'Valerie Hamilt

In [7]:

l.supremum # top
l.infimum # bottom
#l[1]
#l['Execute',]
l.infimum.upper_neighbors # all the concepts that are more general than this one
l.supremum.lower_neighbors # all concepts that are more specific than this one

(<Concept {Sarah Cooper, Christopher Oliver, Charles Vasquez, Meghan Bolton, Brian Tran, Valerie Hamilton} <-> [Create Reports] <=> Create Reports>,
 <Concept {Charles Vasquez, Meghan Bolton, Brian Tran, Samantha Bowen, Valerie Hamilton} <-> [Delete] <=> Delete>,
 <Concept {Sarah Cooper, Christopher Oliver, Samantha Bowen, Valerie Hamilton} <-> [Database Read] <=> Database Read>)

In [8]:
from IPython.display import Image

dot = l.graphviz()
#print(dot.source)
# Render the lattice graph using Graphviz
dot.render('lattice', format='png', cleanup=True, view=True)
#Image(filename='lattice.png')

'lattice.png'