In [1]:
import networkx as nx
import json as js
import pandas as pd

In [3]:
user_context_info_df = pd.read_csv("prod_user_workspace_channel_contexts.csv")
workspace_customer_info_df = pd.read_csv("prod_workspace_id_customers.csv")
markers_instance_info_df = pd.read_csv('prod_markers_instance_id.csv')

In [103]:
user_context_info_df['user_attributes'][5]

'{"id": "b6806dc10a184d829c2568f0c497825d", "name": "Karthik Muralidharan", "email": "karthik@etherlabs.io", "source": "slack", "status": "active", "teamId": "T7ECNFTBK", "avatars": {"image192": "https://avatars.slack-edge.com/2018-03-23/335953803862_c31bdfdc62b7a0825e72_192.png", "imageOriginal": "https://avatars.slack-edge.com/2018-03-23/335953803862_c31bdfdc62b7a0825e72_original.png"}, "deleted": false, "sourceId": "U7FA0ALGL", "createdAt": "2017-10-11T14:04:59.620079695Z", "deletedAt": null, "updatedAt": "2019-08-07T03:53:02.203563224Z", "accessToken": "xoxp-252430537393-253340360564-320311617968-b320c6a53883f92a2d9da5282508f311", "mentionName": "karthik", "workspaceId": "b3614ac518ad4fcc8406263f5f1d2c6a"}'

In [89]:
uattr  = js.loads(user_context_info_df['user_attributes'][0])

In [92]:
uattr['avatars'].get('image192')

'https://avatars.slack-edge.com/2019-06-18/656937253218_4f33486dc068997d264f_192.jpg'

In [6]:
user_context_info_df['workspace_attributes'][0]

'{"id": "8ed6ea6d11dc4cd099c32ab36a02e959", "bot": {"handle": "ether"}, "url": "heartfulness", "name": "Heartfulness", "source": "slack", "disabled": false, "sourceId": "T06QWU410", "createdAt": "2018-09-29T14:25:11.190796445Z", "deletedAt": null, "providers": null, "updatedAt": "2019-07-27T18:47:45.289225263Z", "accessToken": "xoxp-6846956034-446876960887-446008830197-08f482a43eaf973dcc5a77a49d8a381f"}'

In [7]:
user_context_info_df['channel_attributes'][0]

'{"id": "a1c582a6-f73f-4a88-9ac5-043947316384", "name": "venkat-only", "roomId": "GJ9CMDDJR", "scopes": null, "source": "slack", "status": "active", "teamId": "T06QWU410", "deleted": false, "members": ["35bcfa72bf81442c8a1a8bbda7733cde", "4e4e933eb77c4e5eada9e1780c51c662"], "contextId": "01DBB3SN6VJAP5WED6NZX7A7TE", "createdAt": "2019-04-29T16:06:36.270103167Z", "deletedAt": null, "updatedAt": "2019-04-29T16:06:36.364683104Z", "visibility": "private", "accessToken": "", "workspaceId": "8ed6ea6d11dc4cd099c32ab36a02e959", "meetingProvider": ""}'

In [144]:
user_attribute_namespace = ['name', 'email', 'source', 'status', 'deleted', \
                       'sourceId', 'createdAt', 'deletedAt', 'updatedAt', 'accessToken', 'mentionName', 'teamId']

user_node_namespace = ['workspaceId']

workspace_attribute_namespace = ['url', 'name', 'source', 'disabled', 'sourceId', "createdAt", "deletedAt", 
                                 "providers", "updatedAt",  "accessToken"]

channel_attribute_namespace = ['name', 'roomId', 'scopes', "source", "status", "teamId", "deleted", "createdAt",
                              "deletedAt", "updatedAt", "visibility", "accessToken", "meetingProvider"]
channel_node_namespace = ["contextId", "workspaceId"]

In [145]:
user_label = {"attribute": "userId"}

workspace_label = {"attribute": "workspaceId"}
channel_label = {"attribute": "channelId"}
context_label = {"attribute": "contextId"}
mind_label = {"attribute": "mindId", "name": ""}
customer_label = {"attribute": "customerId"}
mind_dict = {"01DAAPWR6W051Q9WWQY99JSGFY" : {"name": "generic", "type": "domain"},
             "01DAAQY88QZB19JQZ5PRJFR76Y" : {"name": "Software Engineering", "type": "domain"},
             "01DAAQYN9GBEBC92AYWNXEDP0C" : {"name": "HR", "type": "domain"},
             "01DAATANXNRQA35E6004HB7MBN" : {"name": "Marketing", "type": "domain"},
             "01DAATBC3AK1QWC5NYC5AHV2XZ" : {"name": "Product", "type": "domain"},
             "01DADP74WFV607KNPCB6VVXGTG" : {"name": "AI", "type": "domain"},
             "01DAAYHEKY5F4E02QVRJPTFTXV" : {"name": "Ether Engineering", "type": "custom"}
            }

user_context_rel = {"relation": "belongsTo"}
context_channel_rel = {"relation": "belongsTo"}
channel_workspace_rel = {"relation": "belongsTo"}
workspace_customer_rel = {"relation": "belongsTo"}
user_workspace_rel = {"relation": "belongsTo"}
user_channel_rel = {"relation": "memberOf"}
context_mind_rel = {"relation": "associatedMind"}

In [147]:
def prepare_user_nodes(user_info):
    user_node_list = []
    for i in range(len(user_info)):
        uinfo_attr = js.loads(user_info['user_attributes'][i])
        u_attr = {x: uinfo_attr.pop(x, None) for x in user_attribute_namespace}
        u_attr.update(user_label)
        try:
            u_attr.update({'avatarImage192': uinfo_attr['avatars'].get('image192')})
            u_attr.update({'avatarImageOriginal': uinfo_attr['avatars'].get('imageOriginal')})
        except:
            u_attr.update({'avatarImage192': "None"})
            u_attr.update({'avatarImageOriginal': "None"})
        
        user_id = user_info['user_id'][i]
        user_node_list.append((user_id, u_attr))
        
    return user_node_list

def prepare_workspace_nodes(user_info):
    workspace_node_list = []
    for i in range(len(user_info)):
        winfo_attr = js.loads(user_info['workspace_attributes'][i])
        
        w_attr = {x: winfo_attr.pop(x, None) for x in workspace_attribute_namespace}
        w_attr.update(workspace_label)
        w_attr.update({'bot': winfo_attr['bot'].get('handle')})
        workspace_id = user_info['workspace_id'][i]
        workspace_node_list.append((workspace_id, w_attr))
        
    return workspace_node_list

def prepare_channel_nodes(user_info):
    channel_node_list = []
    for i in range(len(user_info)):
        cinfo_attr = js.loads(user_info['channel_attributes'][i])
        
        c_attr = {x: cinfo_attr.pop(x, None) for x in channel_attribute_namespace}
        c_attr.update(channel_label)
        channel_id = user_info['channel_id'][i]
        channel_node_list.append((channel_id, c_attr))
        
    return channel_node_list

def prepare_context_nodes(user_info):
    context_node_list = []
    for i in range(len(user_info)):        
        context_id = user_info['context_id'][i]
        context_node_list.append((context_id, context_label))
        
    return context_node_list

def prepare_user_edges(user_info):
    user_workspace_edge_list = []
    user_context_edge_list = []
    user_channel_edge_list = []
    context_channel_edge_list = []
    channel_workspace_edge_list = []
    for i in range(len(user_info)):
        
        user_id = user_info["user_id"][i]
        workspace_id = user_info['workspace_id'][i]
        context_id = user_info['context_id'][i]
        channel_id = user_info['channel_id'][i]
        
        user_workspace_edge_list.append((user_id, workspace_id, user_workspace_rel))
        user_context_edge_list.append((user_id, context_id, user_context_rel))
        user_channel_edge_list.append((user_id, channel_id, user_channel_rel))
        context_channel_edge_list.append((context_id, channel_id, context_channel_rel))
        channel_workspace_edge_list.append((channel_id, workspace_id, channel_workspace_rel))
    
    return user_workspace_edge_list, user_context_edge_list, user_channel_edge_list, context_channel_edge_list, channel_workspace_edge_list

In [148]:
def workspace_customer_nodes(workspace_info):
    workspace_customer_node_list = []
    for i in range(len(workspace_info)):
        customer_id = workspace_info['customer_id'][i]
        
        workspace_customer_node_list.append((customer_id, customer_label))
    
    return workspace_customer_node_list

def workspace_customer_edges(workspace_info):
    workspace_customer_edge_list = []
    for i in range(len(workspace_info)):
        
        workspace_id = workspace_info['workspace_id'][i]
        customer_id = workspace_info['customer_id'][i]
        
        workspace_customer_edge_list.append((workspace_id, customer_id, workspace_customer_rel))
    
    return workspace_customer_edge_list

In [149]:
def prepare_marker_nodes(marker_info):
    marker_nodes_list = []
    for i in range(len(marker_info)):
        marker_id = marker_info['marker_id'][i]
        
        marker_attr = {
            "attribute": "markerId",
            "automaticMarker": marker_info['automatic'][i],
            "type": marker_info['marker_type'][i],
            "description": marker_info['marker_description'][i],
            "createdAt": marker_info['created_at'][i]
        }
        
        marker_nodes_list.append((marker_id))
    
    return marker_nodes_list

def marker_user_nodes(marker_info):
    marker_user_node_list = []
    for i in range(len(marker_info)):
        user_id = marker_info['user_id'][i]
        
        marker_user_node_list.append((user_id, user_label))
    
    return marker_user_node_list

def marker_instance_nodes(marker_info):
    marker_instance_node_list = []
    instance_label = {"attribute": "instanceId"}
    for i in range(len(marker_info)):
        instance_id = marker_info['instance_id'][i]
        
        marker_instance_node_list.append((instance_id, instance_label))
    
    return marker_instance_node_list

def marker_user_edges(marker_info):
    marker_user_edge_list = []
    marker_user_rel = {"relation": "createdBy"}
    for i in range(len(marker_info)):
        user_id = marker_info['user_id'][i]
        marker_id = marker_info['marker_id'][i]
        marker_user_edge_list.append((marker_id, user_id, marker_user_rel))
    
    return marker_user_edge_list

def marker_instance_edges(marker_info):
    marker_instance_edge_list = []
    marker_instance_rel = {"relation": "createdIn"}
    for i in range(len(marker_info)):
        instance_id = marker_info['instance_id'][i]
        marker_id = marker_info['marker_id'][i]
        marker_instance_edge_list.append((marker_id, instance_id, marker_instance_rel))
    
    return marker_instance_edge_list

## Add Mind info

In [150]:
mind_info = pd.read_csv('ws-ch-relations-staging2.csv')

from copy import deepcopy
def prepare_mind_nodes(mind_info):
    
    mind_node_list = []
    for i in range(len(mind_info)):
        m_label = deepcopy(mind_label)
        
        mind_id = mind_info['mind_id'][i]
        current_mind_info = mind_dict[mind_id]
        m_label.update(current_mind_info) 
        mind_node_list.append((mind_id, m_label))
    
    return mind_node_list

## Build the MetaGraph

In [151]:
meta_graph.clear()
meta_graph = nx.DiGraph(type="meta")

### Add user, workspace, context, channel and customer nodes

In [152]:
user_nodes = prepare_user_nodes(user_context_info_df)
workspace_nodes = prepare_workspace_nodes(user_context_info_df)
channel_nodes = prepare_channel_nodes(user_context_info_df)
context_nodes = prepare_context_nodes(user_context_info_df)

user_workspace_edge_list, user_context_edge_list, user_channel_edge_list, context_channel_edge_list, channel_workspace_edge_list = prepare_user_edges(user_context_info_df)

In [153]:
meta_graph.add_nodes_from(user_nodes)
meta_graph.add_nodes_from(workspace_nodes)
meta_graph.add_nodes_from(channel_nodes)
meta_graph.add_nodes_from(context_nodes)

meta_graph.add_edges_from(user_workspace_edge_list)
meta_graph.add_edges_from(user_context_edge_list)
meta_graph.add_edges_from(user_channel_edge_list)
meta_graph.add_edges_from(context_channel_edge_list)
meta_graph.add_edges_from(channel_workspace_edge_list)

In [154]:
print(meta_graph.number_of_nodes(), meta_graph.number_of_edges())

1874 7155


### Add workspace-> customer edges

In [155]:
customer_node_list = workspace_customer_nodes(workspace_customer_info_df)
workspace_customer_edge_list = workspace_customer_edges(workspace_customer_info_df)

In [156]:
meta_graph.add_nodes_from(customer_node_list)
meta_graph.add_edges_from(workspace_customer_edge_list)

In [157]:
print(meta_graph.number_of_nodes(), meta_graph.number_of_edges())

1991 7267


### Add Marker information

In [158]:
marker_node_list = prepare_marker_nodes(markers_instance_info_df)
marker_user_node_list = marker_user_nodes(markers_instance_info_df)
marker_instance_node_list = marker_instance_nodes(markers_instance_info_df)

marker_user_edge_list = marker_user_edges(markers_instance_info_df)
marker_instance_edge_list = marker_instance_edges(markers_instance_info_df)

In [159]:
meta_graph.add_nodes_from(marker_node_list)
meta_graph.add_nodes_from(marker_user_node_list)
meta_graph.add_nodes_from(marker_instance_node_list)

meta_graph.add_edges_from(marker_user_edge_list)
meta_graph.add_edges_from(marker_instance_edge_list)

In [160]:
print(meta_graph.number_of_nodes(), meta_graph.number_of_edges())

32313 59755


In [161]:
meta_graph["8d6db5f7-d9b7-4c54-ba38-fe710ffcaf3f"]

AtlasView({'f62d19b0-e9a8-41f7-97d9-3233b2d2390a': {'relation': 'belongsTo'}, '01DBB3SNV7ZWSB8FNW2448RF49': {'relation': 'belongsTo'}, '01DBB3SN8CSNZ3Y25PAFQM5059': {'relation': 'belongsTo'}, '01DBB3SNN9BS4JZ1G4TER7YXNZ': {'relation': 'belongsTo'}, '01DBB3SPGT34NG93ZHVS08J2Q9': {'relation': 'belongsTo'}, '01DBB3SNVBQXVQ7MKE9JH796BV': {'relation': 'belongsTo'}, '01DBB3SPH6XKEPFSHS9K8EVXPV': {'relation': 'belongsTo'}, '01DBB3SNEZGZBPQB17GG6SW9PX': {'relation': 'belongsTo'}, '01DBB3SNSKT8DQXNCS8M6PBJJJ': {'relation': 'belongsTo'}, '01DBB3SNYC0Q8P238G69HMSWMM': {'relation': 'belongsTo'}, '01DBB3SP5N6PH88VWBHJCXNG3B': {'relation': 'belongsTo'}, '01DBB3SNM3VC7KJ960V2F29FH9': {'relation': 'belongsTo'}, '01DBB3SQ6PR502SX3D4S4G2518': {'relation': 'belongsTo'}, '01DBB3SP385R67YAWJWXV6YHQW': {'relation': 'belongsTo'}, '01DBB3SNVF5MTFMMZR1F3W13KE': {'relation': 'belongsTo'}, '01DBB3SN99AVJ8ZWJDQ57X9TGX': {'relation': 'belongsTo'}, '01DBB3SPPDNJX40G2QCA74D3KQ': {'relation': 'belongsTo'}, '01DBB3SNQ

### Add Mind info and attr to the graph

In [162]:
mind_node_list = prepare_mind_nodes(mind_info)

In [163]:
meta_graph.add_nodes_from(mind_node_list)

In [164]:
print(meta_graph.number_of_nodes(), meta_graph.number_of_edges())

32319 59755


## Save Graph to pickle

In [175]:
nx.write_gpickle(meta_graph, "meta_graph_prod.pickle")

In [174]:
nx.write_graphml(meta_graph, "meta_graph_prod.graphml", infer_numeric_types=True)

In [172]:
for n, attr in meta_graph.nodes.data():
    for k,v in attr.items():
        if v == "":
            print(type(v))
            print(n, k, v)
            meta_graph.nodes[n][k] = "None"

<class 'str'>
35bcfa72-bf81-442c-8a1a-8bbda7733cde email 
<class 'str'>
622258ac-a854-43f5-a772-324bf6c2ebeb email 
<class 'str'>
933183fb-4652-4292-9647-84b26b6f1403 avatarImageOriginal 
<class 'str'>
838d0ce8-d294-493d-8439-5ce008ffd162 email 
<class 'str'>
838d0ce8-d294-493d-8439-5ce008ffd162 avatarImageOriginal 
<class 'str'>
677eebaa-85bf-4e6c-8bdc-9992572e6b0d avatarImageOriginal 
<class 'str'>
c2c32799-0ba8-4240-b122-3a80dd071a7e avatarImageOriginal 
<class 'str'>
5c06fd33-432d-44e0-8cdf-71ba1a8eef3d avatarImageOriginal 
<class 'str'>
4b7109ed-cad9-4ada-abd3-90b7b7e6c0b9 avatarImageOriginal 
<class 'str'>
02e3666b-0c47-4b48-b5e0-fc8c892bcb0b avatarImageOriginal 
<class 'str'>
b494fa7f-dace-4bb9-beab-32e1724442b3 name 
<class 'str'>
b494fa7f-dace-4bb9-beab-32e1724442b3 avatarImageOriginal 
<class 'str'>
e036d3a4-7068-4e9a-a9ff-2b37ed858a5e avatarImageOriginal 
<class 'str'>
1d5aec30-d4c3-499f-8424-edccb0927284 name 
<class 'str'>
1d5aec30-d4c3-499f-8424-edccb0927284 avatarImageOr

In [177]:
for n, attr in meta_graph.nodes.data():
    if attr['attribute'] == "userId":
        print(meta_graph[n])

{'8ed6ea6d-11dc-4cd0-99c3-2ab36a02e959': {'relation': 'belongsTo'}, '01DBB3SN6VJAP5WED6NZX7A7TE': {'relation': 'belongsTo'}, '01DBB3SP2KWP78QRSB3SDBZ0MJ': {'relation': 'belongsTo'}, '01DBB3SNJ8PZ091C71ES8NQP1F': {'relation': 'belongsTo'}, '01DBB3SQCEA1Z0KA6NHRH3Z5QW': {'relation': 'belongsTo'}, '01DBB3SN2ZA8GY2X7QGS1PF4K4': {'relation': 'belongsTo'}, '01DBB3SN6AJC8VE0C0GV7SF9GB': {'relation': 'belongsTo'}, '01DBB3SN23WZMNK9TZXRKPGW61': {'relation': 'belongsTo'}, '01DBB3SN7BDZ43A5X7DHWJV2EC': {'relation': 'belongsTo'}, '01DFTM2QMK9TC897FNXGJEJS0C': {'relation': 'belongsTo'}, '01DGKV3K34QRSD3V1GKDT05P3H': {'relation': 'belongsTo'}, '01DGP5R740PHT717XSXDEN8Z9X': {'relation': 'belongsTo'}, '01DME6G0HXQKK9WNGZZZ3JF5JB': {'relation': 'belongsTo'}, 'a1c582a6-f73f-4a88-9ac5-043947316384': {'relation': 'memberOf'}, 'ac6699b6-51ef-46c2-b5d7-43e262a0702e': {'relation': 'memberOf'}, 'c7fa43e9-395e-49e0-8bc2-28e29736f5b6': {'relation': 'memberOf'}, '4e6976d7-135c-440b-a6a9-e1fcf4429a3a': {'relation

KeyError: 'attribute'

In [179]:
for n1, n2, attr in meta_graph.edges.data():
    if attr['relation'] == "createdBy":
        print(meta_graph[n1])

{'b782dae5-06de-4b7a-bc7d-789a1ad005e5': {'relation': 'createdBy'}, 'fffc929e-0844-4a6b-8fbf-e15b04b59ebc': {'relation': 'createdIn'}}
{'716067a6-0a1a-4034-abc4-9a12ecafb39b': {'relation': 'createdBy'}, 'ee944a72-60c3-41f3-9525-885f68fd0338': {'relation': 'createdIn'}}
{'75bdf310-110b-4b8f-ab88-b16fafce920e': {'relation': 'createdBy'}, 'a68bc584-69a8-441e-8192-176d3a42d1a0': {'relation': 'createdIn'}}
{'e8653826-bf7f-4257-9cd1-43f5f89e1804': {'relation': 'createdBy'}, 'cdc9a444-3a49-4a85-92e6-e9fcd6aad0a6': {'relation': 'createdIn'}}
{'b1c1ae7b-d346-40e5-9d4f-b6e03e36fe80': {'relation': 'createdBy'}, '3d233395-dc96-4b94-beb0-50ad6a599b5e': {'relation': 'createdIn'}}
{'5318e532-354b-4362-93bf-b737bff54fcf': {'relation': 'createdBy'}, '3fa39cd0-7e8b-4ca7-bf0d-8017410c26bd': {'relation': 'createdIn'}}
{'6240db37-4857-4fba-b13f-b0978ea7c8c2': {'relation': 'createdBy'}, '2a59f45b-0ab0-47ff-8a09-dc2d412e1f46': {'relation': 'createdIn'}}
{'8d6db5f7-d9b7-4c54-ba38-fe710ffcaf3f': {'relation': '

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [181]:
meta_graph.nodes['b782dae5-06de-4b7a-bc7d-789a1ad005e5']

{'name': 'Cullen',
 'email': 'cullen@etherlabs.io',
 'source': 'slack',
 'status': 'active',
 'deleted': False,
 'sourceId': 'U7ETWDU6N',
 'createdAt': '2017-10-11T14:08:27.854963073Z',
 'deletedAt': 'None',
 'updatedAt': '2019-09-09T13:34:30.126088275Z',
 'accessToken': 'xoxp-252430537393-252948470226-287109598679-b09d3ee1e50596a674b7d7ec3ced6fe4',
 'mentionName': 'cullen',
 'teamId': 'T7ECNFTBK',
 'attribute': 'userId',
 'avatarImage192': 'https://secure.gravatar.com/avatar/128c4b93aeee478a3fbc00b2f3753ea9.jpg?s=192&d=https%3A%2F%2Fa.slack-edge.com%2F80588%2Fimg%2Favatars%2Fava_0021-192.png',
 'avatarImageOriginal': 'https://secure.gravatar.com/avatar/128c4b93aeee478a3fbc00b2f3753ea9.jpg?s=512&d=https%3A%2F%2Fa.slack-edge.com%2F7fa9%2Fimg%2Favatars%2Fava_0021-512.png'}