In [46]:
import pandas as pd
from collections import deque
from arango import ArangoClient

In [2]:
client = ArangoClient(hosts="https://2ae4f052d710.arangodb.cloud:8529")

db = client.db('machine_learning', username='lab_test', password='z-rRdN-Enf4qQwybGiVdbG')

## Definition

In [4]:
processed_df = pd.read_csv('../data/processed.csv')

In [5]:
processed_df.groupby(['sender_site_id', 'receiver_site_id']).size()

sender_site_id                    receiver_site_id                
00324dd016fb46109856afbb202d64ba  66dfa5395cf740d3970d4d0bc660058e     2
00822da93b50457fbd0f92ca112990e2  a4c503f2dd924587a34e8c6adedde6ba     4
008cf3bb6ce6426bbe8379978efabda3  90c753ee53ba463d9b3fd5a270837ede     7
00ccf9b594344c5c9de4adbefa907d51  90c753ee53ba463d9b3fd5a270837ede    10
00d4cd3f9c9e467b906b42e88db3bcbd  3b34b76e83354031ae39ae3c5336c510    18
                                                                      ..
ffa97e34eeaf47e89e1f9898950ea8ab  d007210609e348228faefaceb79aa442     5
ffdc58a45fe540359a6f2f71c4523eaf  65b624d3dbd0449497ada46c99d3d06b     2
ffe42639b8f74ef299f9b81936f1f565  408ac5662a9d4a92a661383be2c0ee92     3
                                  a579f131d69d4f10988a8d8e4a1bdaee    12
fff5817df2494088b74fbe59984732a7  3b34b76e83354031ae39ae3c5336c510     8
Length: 2194, dtype: int64

In [14]:
cursor = db.aql.execute(
        '''
            FOR transaction in transactions
                LET sender_site = DOCUMENT(transaction._from)._id
                LET receiver_site = DOCUMENT(transaction._to)._id
                COLLECT sender = sender_site, receiver = receiver_site into group
                RETURN {
                    _from: sender,
                    _to: receiver,
                    count: LENGTH(group),
                }
        '''
    )
results = []
for doc in cursor:
    results.append(doc)

In [37]:
if db.has_collection('graph_site_edges'):
    db.delete_collection('graph_site_edges')
    
edges = db.create_collection('graph_site_edges', edge=True)
edges.insert_many(results, silent=False)
edges.count()

2194

In [9]:
nodes = db.collection('sites')

In [17]:
if  db.has_graph('graph_site'):
    db.delete_graph('graph_site')

graph = db.create_graph('graph_site')
graph.create_vertex_collection('sites')
graph.create_edge_definition(
    edge_collection='graph_site_edges',
    from_vertex_collections=['sites'],
    to_vertex_collections=['sites']
)

<EdgeCollection graph_site_edges>

# Centrality

## Degree

In [32]:
degree_cursor = db.aql.execute("""
    FOR v IN sites
        LET outbound_degree_centrality = LENGTH(
            FOR e IN graph_site_edges
                FILTER e._from == v._id
                RETURN e
        )
        LET inbound_degree_centrality = LENGTH(
            FOR e IN graph_site_edges
                FILTER e._to == v._id
                RETURN e
        )
        
        LET total_degree_centrality = outbound_degree_centrality + inbound_degree_centrality

        RETURN {
            site: v._key,
            name: DOCUMENT(v.organization_id).name,
            country: v.country_id,
            inbound_degree: inbound_degree_centrality,
            outbound_degree: outbound_degree_centrality,
            total_degree: total_degree_centrality
        }
""")
degree = []
for doc in degree_cursor:
    degree.append(doc)
inbound_degree_sites = sorted(degree, key=lambda x: x['inbound_degree'], reverse=True)[:3]
outbound_degree_sites = sorted(degree, key=lambda x: x['outbound_degree'], reverse=True)[:3]
total_degree_sites = sorted(degree, key=lambda x: x['total_degree'], reverse=True)[:3]

In [33]:
inbound_degree_sites

[{'site': '3b34b76e83354031ae39ae3c5336c510',
  'name': 'Tengizchevroil Llp',
  'country': 'countries/KZ',
  'inbound_degree': 876,
  'outbound_degree': 1,
  'total_degree': 877},
 {'site': '5a2b9e85170549958bbf20794af54538',
  'name': 'Avery Dennison Converted Products De Mexico Sa De Cv',
  'country': 'countries/MX',
  'inbound_degree': 144,
  'outbound_degree': 2,
  'total_degree': 146},
 {'site': '90c753ee53ba463d9b3fd5a270837ede',
  'name': 'Contitech Fluid Mexicana Servicios, S.A. de C.V.',
  'country': 'countries/MX',
  'inbound_degree': 119,
  'outbound_degree': 2,
  'total_degree': 121}]

In [34]:
outbound_degree_sites

[{'site': '8659f74cc67d4c53b80693eaae86c6f1',
  'name': 'Carboline (India) Private Limited',
  'country': 'countries/Other',
  'inbound_degree': 1,
  'outbound_degree': 5,
  'total_degree': 6},
 {'site': '52b076a7d8c74afaabba4f40af58bf0e',
  'name': 'TREMCO CPG INDIA PRIVATE LIMITED',
  'country': 'countries/IN',
  'inbound_degree': 5,
  'outbound_degree': 5,
  'total_degree': 10},
 {'site': '957c9d5b9d77462abd1c7503be536d6a',
  'name': 'Toxement S A',
  'country': 'countries/FR',
  'inbound_degree': 0,
  'outbound_degree': 5,
  'total_degree': 5}]

In [25]:
total_degree_sites

[{'site': '3b34b76e83354031ae39ae3c5336c510',
  'name': 'Tengizchevroil Llp',
  'country': 'countries/KZ',
  'inbound_degree': 876,
  'outbound_degree': 1,
  'total_degree': 877},
 {'site': '5a2b9e85170549958bbf20794af54538',
  'name': 'Avery Dennison Converted Products De Mexico Sa De Cv',
  'country': 'countries/MX',
  'inbound_degree': 144,
  'outbound_degree': 2,
  'total_degree': 146},
 {'site': '90c753ee53ba463d9b3fd5a270837ede',
  'name': 'Contitech Fluid Mexicana Servicios, S.A. de C.V.',
  'country': 'countries/MX',
  'inbound_degree': 119,
  'outbound_degree': 2,
  'total_degree': 121}]

## Closeness

In [44]:
closeness_job_id = db.pregel.create_job(
    graph='graph_site',
    algorithm='effectivecloseness',
    store=True,
    max_gss=None,
    thread_count=1,
    async_mode=False,
    result_field='effective_closeness'
)
closeness_aql = """
FOR site IN sites
    RETURN {"_key": site._key, "name": DOCUMENT(site.organization_id).name, "country": site.country_id, "effective_closeness": ROUND(site.effective_closeness*100)/100}
"""
closeness = []
for doc in db.aql.execute(closeness_aql):
    closeness.append(doc)
sorted(closeness, key=lambda x: x['effective_closeness'], reverse=True)[:3]

[{'_key': 'e0befcb8ac4547b1931dde4835e8d067',
  'name': 'Ad Fire Protection Systems Corp',
  'country': 'countries/US',
  'effective_closeness': 5.8},
 {'_key': '9f2ef6675ac94678831705aa3d99c9a9',
  'name': 'GST970206G13 GRUPO STONCOR SA DE CV',
  'country': 'countries/MX',
  'effective_closeness': 4.85},
 {'_key': '5535b447437d4118a7a1eca99563bfb9',
  'name': 'Carboline Company',
  'country': 'countries/US',
  'effective_closeness': 3.88}]

## Betweenness

In [45]:
betweeness_job_id = db.pregel.create_job(
    graph='graph_site',
    algorithm='linerank',
    store=True,
    max_gss=None,
    thread_count=1,
    async_mode=False,
    result_field='line_rank'
)
betweeness_aql = """
FOR site IN sites
    RETURN {"_key": site._key, "name": DOCUMENT(site.organization_id).name, "country": site.country_id, "line_rank": ROUND(site.line_rank*100)/100}
"""
betweeness = []
for doc in db.aql.execute(betweeness_aql):
    betweeness.append(doc)
sorted(betweeness, key=lambda x: x['line_rank'], reverse=True)[:3]

[{'_key': '3b34b76e83354031ae39ae3c5336c510',
  'name': 'Tengizchevroil Llp',
  'country': 'countries/KZ',
  'line_rank': 0.11},
 {'_key': 'f0136a3c7b4a4fd5bf89b57776f62d1a',
  'name': 'Stoncor Benelux B.V.',
  'country': 'countries/NL',
  'line_rank': 0.05},
 {'_key': '90c753ee53ba463d9b3fd5a270837ede',
  'name': 'Contitech Fluid Mexicana Servicios, S.A. de C.V.',
  'country': 'countries/MX',
  'line_rank': 0.02}]

## Pagerank

In [41]:
pagerank_job_id = db.pregel.create_job(
    graph='graph_site',
    algorithm='pagerank',
    store=True,
    max_gss=None,
    thread_count=1,
    async_mode=False,
    result_field='page_rank'
)
pagerank_aql = """
FOR site IN sites
    RETURN {"_key": site._key, "name": DOCUMENT(site.organization_id).name, "country": site.country_id, "page_rank": ROUND(site.page_rank*1000)/1000}
"""
pagerank = []
for doc in db.aql.execute(pagerank_aql):
    pagerank.append(doc)
sorted(pagerank, key=lambda x: x['page_rank'], reverse=True)[:3]

[{'_key': '3b34b76e83354031ae39ae3c5336c510', 'page_rank': 0.052},
 {'_key': 'f0136a3c7b4a4fd5bf89b57776f62d1a', 'page_rank': 0.044},
 {'_key': '5a2b9e85170549958bbf20794af54538', 'page_rank': 0.009}]