<h1 style="color:#181818; font-weight:900; font-size: 1.2em">DEPENDENCY INSTALLATION</h1>

In [1]:
%pip install --upgrade pip
%pip install python-gitlab
%pip install python-dotenv
%pip install pyvis

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


<h1 style="color:#181818; font-weight:900; font-size: 1.2em">GET NUMBER OF DEPENDENCIES INSTALLED <em style="color:#181800; font-weight:900; font-size: 0.6em">NOT REQUIRED</em></h1>

In [2]:
!pip freeze | wc -l

      45


<h1 style="color:#181818; font-weight:900; font-size: 1.2em">IMPORT DEPENDENCIES</h1>

In [3]:
import os
import re
import json
import copy
import dotenv
import gitlab
from typing import Union
from pyvis.network import Network

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">LOAD ENVIRONMENT AND GET ENVIRONMENT VARIABLES</h1>

In [4]:
dotenv.load_dotenv()
READ_AUTH_KEY = os.getenv("READ_AUTH_KEY")
HOST = "https://gitlab.turntabl.net"

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">UTILITY METHODS</h1>

In [5]:
def remove_duplicates_from_list_of_dict(dict_list: list, key:str) -> list:
    return list({item[key]:item for item in dict_list}.values())
    
def remove_duplicates_from_list_of_list(list_lists:list):
    seen = set()
    return [x for x in list_lists if tuple(x) not in seen and not seen.add(tuple(x))]

    # # Convert the list of dictionaries to a list of frozensets
    # unique_set = set(frozenset(list_lists))
    
    # # Convert the unique set back to a list of dictionaries and return
    # return [dict(fs) for fs in unique_set]

def extract_keys_and_values(dict_list: list):
    # Assuming all dictionaries have the same keys
    keys = dict_list[0].keys()
    return { key: [item[key] for item in dict_list] for key in keys }

def generate_id(_Type, _Id):
    return str(_Type)+'.'+str(_Id)

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">JSON FILE I/O OPERATIONS </h1>

In [15]:
def read_json_file_to_obj(file_path:str) -> dict:
    try:
        with open(file_path, "r") as f:
            data = json.load(f)
            return data
    except Exception as e:
        print(f'{e.__class__.__name__}: {e}')

def write_obj_to_json_file(file_path:str, data:dict):
    try:
        with open(file_path, "w") as f:
            json.dump(data, f)
    except Exception as e:
        print(f'{e.__class__.__name__}: {e}')

def update_json_graph_data(file_path:str, nodes:list=[], relationships:list=[]):
    '''
        This method updates keeping old changes or completely overwrites file.
        data_obj argument must follow the format {"nodes":[], "relationships":[]} to yield output'''

    data = {
        "nodes": nodes, 
        "relationships": relationships
    }
    
    f_data = read_json_file_to_obj(file_path)

    if f_data:
        data.get("nodes",[]).extend(f_data.get("nodes",[]))
        data.get("relationships",[]).extend(f_data.get("relationships",[]))

        # remove duplicates
        data["nodes"] = remove_duplicates_from_list_of_dict(data["nodes"], "id")
        # data["relationships"] = remove_duplicates_from_list_of_list(data["relationships"])
        
    return data

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">OBJECT TO JSON MAPPING OPERATIONS</h1>

In [7]:
def get_schema_path_properties(schema_path: str, schema_file_path: str = "../databox/graph.schema.json") -> dict:
    '''
        schema_path(Object Type) should follow the format -> parent.n_child.n-1_child...n-n_child 
        .e.g. nodes.[Type], relationships.[Type]
        schema_path value is case sensitive
        returns schema_path_properties'''
    
    schema_path+=".properties"
    res = read_json_file_to_obj(schema_file_path) # pass as variable
    
    for path in schema_path.split('.'):
        if isinstance(res, list):break
        res = res.get(path, [])

    return { k:None for k in  res} 

def map_obj_to_json_schema(obj:object, schema_path_properties: dict, **extra_props) -> dict:
    '''
        obj is the object to read values from.
        schema_path_properties is a dict with keys we want to set.
        to assign custom values to keys schema_path_properties dict set the keys
            of the said custom values as value to the assoiciated key in schema_path_properties dict.'''

    obj_dict = obj.__dict__['_attrs'] if '_attrs' in obj.__dict__.keys() else obj.__dict__
    schema_path_properties_copy = copy.copy(schema_path_properties) # re-assign as python uses a mechanism called "call by object reference" or "call by assignment."
    
    for k,v in schema_path_properties_copy.items():
        schema_path_properties_copy[k] = obj_dict.get(k, '') if v==None else obj_dict.get(v[1:], '') if str(v).startswith('$') else v

    return {**schema_path_properties_copy, **extra_props}

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">GET DATA FROM REMOTE DATA SOURCE</h1>

In [None]:
gl = gitlab.Gitlab(url=HOST, private_token=READ_AUTH_KEY, api_version=4, ssl_verify=False)

# fetch groups
groups = gl.groups.list(get_all=True)

# fetch users
users = gl.users.list(get_all=True)

# groups users relationship
groups_users = []
for group in groups:
    groups_users.append({ 'from_id': generate_id('nodes.Group', group.id),'to_ids': [generate_id('nodes.User', user.id) for user in group.members.list(get_all=True)], 'rel_obj': group })

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">GRAPH PLOT OPERATIONS</h1>

In [9]:
def create_graph_nodes(network, ids:list, labels:list, titles:list,  color, size, shape='circle', **options):
    try:
        network.add_nodes(ids, label=labels, color=[color]*len(ids), size=[size]*len(ids), title=titles, **options)
    except Exception as e:
        print(f"{e.__class__.__name__}: {e}")

def create_graph_edges(network, edges:list, weight=5.87, color=None):
    try:
        for edge in edges:
            network.add_edge(edge[0], edge[1], value=10, title=edge[2], **edge[3])
    except Exception as e:
        print(f"{e.__class__.__name__}: {e}")

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">VISUALISE GRAPH</h1>

In [10]:
def show_graph(network, name='../graphs/nx2.html'):   
    network.show(name)

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">QUERY GRAPH DATA</h1>

In [11]:
# tba

<h1 style="color:#181800; font-weight:900; font-size: 1.2em">SANDBOX</h1>

In [20]:
'''
Node Operations
'''

# get graph.nodes schemas for specific types
user_props = get_schema_path_properties('nodes.User')
group_props = get_schema_path_properties('nodes.Group')

# generate list of graph.nodes data per type [set node ID here]
users_data = [ map_obj_to_json_schema(user, user_props, node_id=generate_id('nodes.User', user.id)) for user in users ]
groups_data = [ map_obj_to_json_schema(group, group_props, node_id=generate_id('nodes.Group', group.id)) for group in groups ]

# get updated list of nodes and relationships 
data_obj = update_json_graph_data("../databox/graph.data.json", [*users_data, *groups_data])

# write data to schema 
write_obj_to_json_file("../databox/graph.data.json", data_obj)

# create graph
graph_network = Network(height="80vh", width="100%", select_menu=True, filter_menu=True, notebook=True, cdn_resources='in_line')
graph_network.inherit_edge_colors(True)

# set the physics layout of the network [barnes_hut(), force_atlas_2based(), repulsion(), hrepulsion]
graph_network.force_atlas_2based()

# create user nodes
zipped_users_data = extract_keys_and_values(users_data)
create_graph_nodes( network=graph_network, ids=zipped_users_data["node_id"], labels=zipped_users_data["username"], 
    color='#0fb9b1', size=20, titles=zipped_users_data["name"])

# create group nodes
zipped_groups_data = extract_keys_and_values(groups_data)
create_graph_nodes( network=graph_network, ids=zipped_groups_data["node_id"], labels=zipped_groups_data["name"], 
    color='#eb4d4b', size=20, titles=zipped_groups_data["full_name"])

'''
Edge & Relation Operations
'''

# get graph.relationships schemas for specific types [get props for relation]
groups_users_props = get_schema_path_properties('relationships.OWNS')

# print(groups_users_props)

# generate list of graph.relationships data per type [set node ID here]
groups_users_data = []
for item in groups_users:  
    rel_obj_map = map_obj_to_json_schema(item['rel_obj'], groups_users_props)
    for to_id in item['to_ids']: 
        groups_users_data.append( ( item['from_id'], to_id, 'relationships.OWNS', rel_obj_map ) )

# get updated list of nodes and relationships 
data_obj = update_json_graph_data("../databox/graph.data.json", relationships=groups_users_data)

# write data to schema 
write_obj_to_json_file("../databox/graph.data.json", data_obj)

# create edges
''' edge format -> (from, to, relation_type, extra_props:dict) .e.g. ('nodes.User.187', 'nodes.Group.1637', 'relationships.BELONGS_TO', {"key":"value"}) '''
# dummy_edges = [
#     ('nodes.User.187', 'nodes.Group.1637', 'relationships.BELONGS_TO', {"key":"value"}),
#     ('nodes.User.1', 'nodes.Group.1637', 'relationships.BELONGS_TO', {"key":"value"})
# ]
create_graph_edges(graph_network, groups_users_data)

# show graph
show_graph(graph_network)

{'name': None}
IndexError: list index out of range
../graphs/nx2.html
