<h1 style="color:#181818; font-weight:900; font-size: 1.2em">DEPENDENCY INSTALLATION</h1>

In [None]:
%pip install --upgrade pip
%pip install python-gitlab
%pip install python-dotenv
%pip install pyvis

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">GET NUMBER OF DEPENDENCIES INSTALLED <em style="color:#181800; font-weight:900; font-size: 0.6em">NOT REQUIRED</em></h1>

In [None]:
!pip freeze | wc -l

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">IMPORT DEPENDENCIES</h1>

In [None]:
import os
import re
import json
import copy
import dotenv
import gitlab
from typing import Union
from pyvis.network import Network

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">LOAD ENVIRONMENT AND GET ENVIRONMENT VARIABLES</h1>

In [None]:
dotenv.load_dotenv()
READ_AUTH_KEY = os.getenv("READ_AUTH_KEY")
HOST = "https://gitlab.turntabl.net"

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">UTILITY METHODS</h1>

In [None]:
def remove_duplicates_from_list_of_dict(dict_list: list, key:str) -> list:
    return list({item[key]:item for item in dict_list}.values())
    
def remove_duplicates_from_list_of_list(list_lists:list):
    seen = set()
    return [x for x in list_lists if tuple(x) not in seen and not seen.add(tuple(x))]

    # # Convert the list of dictionaries to a list of frozensets
    # unique_set = set(frozenset(list_lists))
    
    # # Convert the unique set back to a list of dictionaries and return
    # return [dict(fs) for fs in unique_set]

def extract_keys_and_values(dict_list: list):
    # Assuming all dictionaries have the same keys
    keys = dict_list[0].keys()
    return { key: [item[key] for item in dict_list] for key in keys }

def generate_id(_Type, _Id):
    return str(_Type)+'.'+str(_Id)

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">JSON FILE I/O OPERATIONS </h1>

In [None]:
def read_json_file_to_obj(file_path:str) -> dict:
    try:
        with open(file_path, "r") as f:
            data = json.load(f)
            return data
    except Exception as e:
        print(f'{e.__class__.__name__}: {e}')

def write_obj_to_json_file(file_path:str, data:dict):
    try:
        with open(file_path, "w") as f:
            json.dump(data, f)
    except Exception as e:
        print(f'{e.__class__.__name__}: {e}')

def update_json_graph_data(file_path:str, nodes:list=[], relationships:list=[]):
    '''
        This method updates keeping old changes or completely overwrites file.
        data_obj argument must follow the format {"nodes":[], "relationships":[]} to yield output'''

    data = {
        "nodes": nodes, 
        "relationships": relationships
    }
    
    f_data = read_json_file_to_obj(file_path)

    if f_data:
        data.get("nodes",[]).extend(f_data.get("nodes",[]))
        data.get("relationships",[]).extend(f_data.get("relationships",[]))

        # remove duplicates
        data["nodes"] = remove_duplicates_from_list_of_dict(data["nodes"], "id")
        # data["relationships"] = remove_duplicates_from_list_of_list(data["relationships"])
        
    return data

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">OBJECT TO JSON MAPPING OPERATIONS</h1>

In [None]:
def get_schema_path_properties(schema_path: str, schema_file_path: str = "../databox/graph.schema.json") -> dict:
    '''
        schema_path(Object Type) should follow the format -> parent.n_child.n-1_child...n-n_child 
        .e.g. nodes.[Type], relationships.[Type]
        schema_path value is case sensitive
        returns schema_path_properties'''
    
    schema_path+=".properties"
    res = read_json_file_to_obj(schema_file_path) # pass as variable
    
    for path in schema_path.split('.'):
        if isinstance(res, list):break
        res = res.get(path, [])

    return { k:None for k in  res} 

def map_obj_to_json_schema(obj:object, schema_path_properties: dict, **extra_props) -> dict:
    '''
        obj is the object to read values from.
        schema_path_properties is a dict with keys we want to set.
        to assign custom values to keys schema_path_properties dict set the keys
            of the said custom values as value to the assoiciated key in schema_path_properties dict.'''

    obj_dict = obj.__dict__['_attrs'] if '_attrs' in obj.__dict__.keys() else obj.__dict__
    schema_path_properties_copy = copy.copy(schema_path_properties) # re-assign as python uses a mechanism called "call by object reference" or "call by assignment."
    
    for k,v in schema_path_properties_copy.items():
        schema_path_properties_copy[k] = obj_dict.get(k, '') if v==None else obj_dict.get(v[1:], '') if str(v).startswith('$') else v

    return {**schema_path_properties_copy, **extra_props}

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">GET DATA FROM REMOTE DATA SOURCE</h1>

In [None]:
gl = gitlab.Gitlab(url=HOST, private_token=READ_AUTH_KEY, api_version=4, ssl_verify=False)

# fetch groups
groups = gl.groups.list(get_all=True)

# fetch users
users = gl.users.list(get_all=True)

# groups users relationship
groups_users = []
for group in groups:
    groups_users.append({ 'from_id': generate_id('nodes.Group', group.id),'to_ids': [generate_id('nodes.User', user.id) for user in group.members.list(get_all=True)], 'rel_obj': group })

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">GRAPH PLOT OPERATIONS</h1>

In [None]:
def create_graph_nodes(network, ids:list, labels:list, titles:list,  color, size, shape='circle', **options):
    try:
        network.add_nodes(ids, label=labels, color=[color]*len(ids), size=[size]*len(ids), title=titles, **options)
    except Exception as e:
        print(f"{e.__class__.__name__}: {e}")

def create_graph_edges(network, edges:list, weight=5.87, color=None):
    try:
        for edge in edges:
            network.add_edge(edge[0], edge[1], value=10, title=edge[2], **edge[3])
    except Exception as e:
        print(f"{e.__class__.__name__}: {e}")

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">VISUALISE GRAPH</h1>

In [None]:
def create_graph(network, name='../graphs/nx2.html'):   
    html = network.generate_html()
    with open(name, mode='w', encoding='utf-8') as f:
        f.write(html)

def show_graph(network, name='../graphs/nx2.html'):   
    network.show(name)

<h1 style="color:#181818; font-weight:900; font-size: 1.2em">QUERY GRAPH DATA</h1>

In [None]:
# tba

<h1 style="color:#181800; font-weight:900; font-size: 1.2em">SANDBOX</h1>

In [None]:
'''
Node Operations
'''

# get graph.nodes schemas for specific types
user_props = get_schema_path_properties('nodes.User')
group_props = get_schema_path_properties('nodes.Group')

# generate list of graph.nodes data per type [set node ID here]
users_data = [ map_obj_to_json_schema(user, user_props, node_id=generate_id('nodes.User', user.id)) for user in users ]
groups_data = [ map_obj_to_json_schema(group, group_props, node_id=generate_id('nodes.Group', group.id)) for group in groups ]

# get updated list of nodes and relationships 
data_obj = update_json_graph_data("../databox/graph.data.json", [*users_data, *groups_data])

# write data to schema 
write_obj_to_json_file("../databox/graph.data.json", data_obj)

# create graph
graph_network = Network(height="80vh", width="100%", select_menu=True, filter_menu=True, notebook=True, cdn_resources='in_line')
graph_network.inherit_edge_colors(True)

# set the physics layout of the network [barnes_hut(), force_atlas_2based(), repulsion(), hrepulsion]
graph_network.force_atlas_2based()

# create user nodes
zipped_users_data = extract_keys_and_values(users_data)
create_graph_nodes( network=graph_network, ids=zipped_users_data["node_id"], labels=zipped_users_data["username"], 
    color='#0fb9b1', size=20, titles=zipped_users_data["name"])

# create group nodes
zipped_groups_data = extract_keys_and_values(groups_data)
create_graph_nodes( network=graph_network, ids=zipped_groups_data["node_id"], labels=zipped_groups_data["name"], 
    color='#eb4d4b', size=20, titles=zipped_groups_data["full_name"])

'''
Edge & Relation Operations
'''

# get graph.relationships schemas for specific types [get props for relation]
groups_users_props = get_schema_path_properties('relationships.OWNS')

# print(groups_users_props)

# generate list of graph.relationships data per type [set node ID here]
groups_users_data = []
for item in groups_users:  
    rel_obj_map = map_obj_to_json_schema(item['rel_obj'], groups_users_props)
    for to_id in item['to_ids']: 
        groups_users_data.append( ( item['from_id'], to_id, 'relationships.OWNS', rel_obj_map ) )

# get updated list of nodes and relationships 
data_obj = update_json_graph_data("../databox/graph.data.json", relationships=groups_users_data)

# write data to schema 
write_obj_to_json_file("../databox/graph.data.json", data_obj)

# create edges
''' edge format -> (from, to, relation_type, extra_props:dict) .e.g. ('nodes.User.187', 'nodes.Group.1637', 'relationships.BELONGS_TO', {"key":"value"}) '''
# dummy_edges = [
#     ('nodes.User.187', 'nodes.Group.1637', 'relationships.BELONGS_TO', {"key":"value"}),
#     ('nodes.User.1', 'nodes.Group.1637', 'relationships.BELONGS_TO', {"key":"value"})
# ]
create_graph_edges(graph_network, groups_users_data)

# show graph
create_graph(graph_network)

# show graph [might fails on systems with specific encoding constraints]
# show_graph(graph_network) 

<h1 style="color:#fff; font-weight:900; font-size: 1.4em">Brevan Howard Tasks</h1>

<h1 style="color:#bdc3c7; font-weight:900; font-size: 1.2em">INSTALL PACKAGES</h1>

In [None]:
# %pip uninstall pandas 
%pip install pandas==2.0.3
%pip install "pandas[excel]"

<h1 style="color:#bdc3c7; font-weight:900; font-size: 1.2em">CONVERT EXCEL TO CSV AND READ FROM CSV FILE</h1>

In [None]:
import pandas as pd

# set excel file path
input_excel_file="./input_excel.xlsx"
target_file_path="./target_file.csv"

# read excel file
excel_file = pd.read_excel(input_excel_file, skiprows=1)

# converting excel file into CSV file
excel_file.to_csv(target_file_path, index=None, header=True)

# read and convert the output csv file into a dataframe object
data_frame = pd.DataFrame(pd.read_csv(target_file_path))

# group the data_frame by '%Key%' and aggregate '%Value%' as a list
result_dict = data_frame.groupby('Instance')['Greoe'].agg(list).to_dict()

print(data_frame, result_dict, sep='\n')

<h1 style="color:#bdc3c7; font-weight:900; font-size: 1.2em">GET DATA FROM SOURCES</h1>

In [None]:
HOST_V1 = "https://gitlab.turntabl.net"

# create gitlab instance
gl = gitlab.Gitlab(url=HOST_V1, private_token=READ_AUTH_KEY, api_version=4, ssl_verify=False)

# fetch users
users = gl.users.list(get_all=True)

# fetch repositories
projects = gl.projects.list(get_all=True)

<h1 style="color:#bdc3c7; font-weight:900; font-size: 1.2em">APPLY FILTER</h1>

In [None]:

# print group path
print(projects[0].namespace['full_path'], projects[0].name, sep='\n')

# instance_list will be from result_dict list value [i.e. groups to omit]
instance_list = ['turntabl-hackathon/turntabl-website-hackathon']

# FILTER PROJECTS 
filtered_projects = [project for project in projects if project.namespace['full_path'] not in instance_list]

print(len(projects), len(filtered_projects), filtered_projects, sep='\n')

# FILTER USERS -> instance_list will be from result_dict list value
user_ids_to_omit = set()

groups = gl.groups.list(get_all=True)
for group in groups:
    if group.full_path in instance_list:
        members = group.members.list(get_all=True)
        user_ids_to_omit.update(member.id for member in members)

filtered_users = [user for user in users if user.id not in user_ids_to_omit]

# projects users -> [relationship and edges]
filtered_projects_users = []
for project in filtered_projects:
    filtered_projects_users.append({ 'from_id': generate_id('nodes.Project', project.id), 'to_ids': [generate_id('nodes.User', user.id) for user in project.members.list(get_all=True) if user.id not in user_ids_to_omit], 'rel_obj': project })

<h1 style="color:#bdc3c7; font-weight:900; font-size: 1.2em">CREATE NODES, EDGES AND GRAPH</h1>

In [None]:
# create graph
graph_network_filtered = Network(height="80vh", width="100%", select_menu=True, filter_menu=True, notebook=True, cdn_resources='in_line')
graph_network_filtered.inherit_edge_colors(True)

# set the physics layout of the network [barnes_hut(), force_atlas_2based(), repulsion(), hrepulsion]
graph_network_filtered.force_atlas_2based()

# set graph data and scheme file paths
graph_data_file_path = '../databox/graph.data.json'
graph_schema_file_path = '../databox/graph.schema.json'
graph_target_html_file = '../graphs/filtered_repo_user.html'

'''
Node Data Operations
'''

# get graph.nodes schemas for specific types
user_props = get_schema_path_properties('nodes.User', graph_schema_file_path)
project_props = get_schema_path_properties('nodes.Project', graph_schema_file_path)

# generate list of graph.nodes data per type [set node ID here]
filtered_users_data = [ map_obj_to_json_schema(user, user_props, node_id=generate_id('nodes.User', user.id)) for user in filtered_users ]
filtered_projects_data = [ map_obj_to_json_schema(project, project_props, node_id=generate_id('nodes.Project', project.id)) for project in filtered_projects ]

'''
Edge & Relation Data Operations
'''

# get graph.relationships schemas for specific types [get props for relation]
projects_users_props = get_schema_path_properties('relationships.BELONGS_TO')

# generate list of graph.relationships data per type [set node ID here]
filtered_projects_users_data = []
for item in filtered_projects_users:  
    rel_obj_map = map_obj_to_json_schema(item['rel_obj'], projects_users_props)
    for to_id in item['to_ids']: 
        filtered_projects_users_data.append( ( item['from_id'], to_id, 'relationships.OWNS', rel_obj_map ) )

'''
Write data to Schema db
'''
# get updated list of nodes and relationships 
filtered_data_obj = update_json_graph_data(graph_data_file_path, [*filtered_users_data, *filtered_projects_data], filtered_projects_users_data)

# write data to schema 
write_obj_to_json_file(graph_data_file_path, filtered_data_obj)

'''
Create Nodes
'''

# create user nodes
zipped_users_data = extract_keys_and_values(filtered_users_data)
create_graph_nodes( network=graph_network_filtered, ids=zipped_users_data["node_id"], labels=zipped_users_data["username"], 
    color='#0c2461', size=20, titles=zipped_users_data["name"])

# create project nodes
zipped_projects_data = extract_keys_and_values(filtered_projects_data)
create_graph_nodes( network=graph_network_filtered, ids=zipped_projects_data["node_id"], labels=zipped_projects_data["name"], 
    color='#f39c12', size=20, titles=zipped_projects_data["name_with_namespace"])

'''
Create Edge
'''

# create edges
''' edge format -> (from, to, relation_type, extra_props:dict) .e.g. ('nodes.User.187', 'nodes.Group.1637', 'relationships.BELONGS_TO', {"key":"value"}) '''
create_graph_edges(graph_network_filtered, filtered_projects_users_data)

'''
Create Graph
'''

# show graph
create_graph(graph_network_filtered, graph_target_html_file)