In [113]:
import pandas as pd
import numpy as np
import networkx as nx
import pickle

import matplotlib
matplotlib.use('Agg')
%matplotlib inline

This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "/Users/jeremyd/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/jeremyd/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/jeremyd/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/jeremyd/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/jeremyd/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/Users/jeremyd/anaconda3/lib/python3

<h1> MultiGraph </h1>
<br>
This Workplace Contact graph is designed to include all interactions. Therefore, there can exist multiple edges between any two nodes if two people have contacted each other more than once during the study.

This graph can be useful for looking at network characteristics if all interactions must be considered but if outside analysis is to be performed, it's often easier to look at and manipulate the contacts DF in order to find out more about workplace interactions.


In [82]:
# Read in Contacts Data
contacts = pd.read_csv("../data/contacts.csv", header = None)

# Read in Department Data
department = pd.read_csv("../data/department.csv", header = None)

<b>Contact Data</b> -> Edge List (id1 - id2) w/ time stamp <br>
<b>Department Data</b> -> Map from id to Department


In [104]:
# Map from ID : Department
map_id_dep = {}
for i in range(department.shape[0]):
    map_id_dep[department.iloc[i, 0]] = department.iloc[i, 1]

In [105]:
# Map from ID : Floor
map_id_floor = {}

for id in map_id_dep.keys():
    dept = map_id_dep[id]
    if (dept == "DSE" or dept == "SRH"): 
        map_id_floor[id] = 0
    else:
        map_id_floor[id] = 1

In [84]:
contacts = contacts.rename(index = str, columns = {0 : "time", 1 : "p1", 2 : "p2"})
contacts.head()

Unnamed: 0,time,p1,p2
0,28820,492,938
1,28860,267,272
2,29300,181,826
3,29780,79,181
4,30000,150,196


In [85]:
contacts_multigraph = nx.from_pandas_dataframe(contacts, "p1", "p2", edge_attr = "time", create_using = nx.MultiGraph())

In [86]:
list(contacts_multigraph.nodes())[0]

492

<h3> MultiGraph Characteristics </h3> <br>
networkx graph: <b>contacts_multigraph</b> <br>
* type - MultiGraph (multiple edges between nodes)
* nodes - labeled by id
<br>

<h4>Edge Attributes </h4>
* time - each edge stores the time of interaction between the 2 nodes it connects
* nx.get_edge_attributes(graph, "attribute")
<br>

<h4>Node Attributes </h4>
* department - identifies what department a node is in 
* floor - identifies what floor a node is on
* nx.get_node_attributes(graph, "attribute")




In [108]:
# adds department attribute to nodes
for i in contacts_multigraph.nodes():
    contacts_multigraph.node[i]["department"] = map_id_dep[i]
    
# adds floor attribute to nodes
for j in contacts_multigraph.nodes():
    contacts_multigraph.node[j]["floor"] = map_id_floor[j]

In [112]:
contacts_multigraph.node[50]["department"]

'DMCT'

In [111]:
contacts_multigraph.node[50]["floor"]

1

In [89]:
nx.get_edge_attributes(contacts_multigraph, "time")

{(492, 938, 0): 28820,
 (492, 601, 0): 119200,
 (492, 601, 1): 119220,
 (492, 601, 2): 207220,
 (492, 601, 3): 207240,
 (492, 601, 4): 207260,
 (492, 601, 5): 207320,
 (492, 601, 6): 207340,
 (492, 601, 7): 230100,
 (492, 601, 8): 230140,
 (492, 601, 9): 230160,
 (492, 601, 10): 230180,
 (492, 601, 11): 230200,
 (492, 601, 12): 230220,
 (492, 601, 13): 230240,
 (492, 601, 14): 230260,
 (492, 601, 15): 230280,
 (492, 601, 16): 230300,
 (492, 601, 17): 230320,
 (492, 601, 18): 230360,
 (492, 601, 19): 230380,
 (492, 601, 20): 230400,
 (492, 601, 21): 230460,
 (492, 601, 22): 230480,
 (492, 601, 23): 230500,
 (492, 601, 24): 230520,
 (492, 601, 25): 230540,
 (492, 601, 26): 230560,
 (492, 601, 27): 230580,
 (492, 601, 28): 230600,
 (492, 601, 29): 230620,
 (492, 601, 30): 230680,
 (492, 601, 31): 230700,
 (492, 601, 32): 393180,
 (492, 601, 33): 393280,
 (492, 601, 34): 393300,
 (492, 601, 35): 393320,
 (492, 601, 36): 393340,
 (492, 601, 37): 393360,
 (492, 601, 38): 393380,
 (492, 601, 

In [95]:
contacts_multigraph.number_of_nodes()

92

In [96]:
department.shape[0]

92

In [97]:
contacts_multigraph.number_of_edges()

9827

In [98]:
contacts.shape[0]

9827

In [99]:
contacts_graph.nodes()

NodeView((492, 938, 267, 272, 181, 826, 79, 150, 196, 21, 205, 448, 765, 39, 253, 80, 123, 845, 118, 311, 95, 771, 153, 164, 179, 481, 122, 120, 819, 431, 778, 271, 601, 15, 56, 194, 786, 603, 496, 275, 222, 50, 172, 29, 223, 116, 285, 762, 63, 939, 335, 105, 113, 513, 119, 154, 494, 48, 209, 17, 66, 102, 134, 240, 242, 804, 511, 875, 184, 499, 268, 185, 533, 987, 210, 35, 131, 211, 751, 265, 784, 273, 662, 709, 101, 779, 132, 255, 213, 791, 743, 87))

In [37]:
nx.get_edge_attributes(contacts_graph, "time") # use get_edge_attributes to get time 
# time is labeled on the edges because there can be multiple edges between nodes

{(492, 938, 0): 28820,
 (492, 601, 0): 119200,
 (492, 601, 1): 119220,
 (492, 601, 2): 207220,
 (492, 601, 3): 207240,
 (492, 601, 4): 207260,
 (492, 601, 5): 207320,
 (492, 601, 6): 207340,
 (492, 601, 7): 230100,
 (492, 601, 8): 230140,
 (492, 601, 9): 230160,
 (492, 601, 10): 230180,
 (492, 601, 11): 230200,
 (492, 601, 12): 230220,
 (492, 601, 13): 230240,
 (492, 601, 14): 230260,
 (492, 601, 15): 230280,
 (492, 601, 16): 230300,
 (492, 601, 17): 230320,
 (492, 601, 18): 230360,
 (492, 601, 19): 230380,
 (492, 601, 20): 230400,
 (492, 601, 21): 230460,
 (492, 601, 22): 230480,
 (492, 601, 23): 230500,
 (492, 601, 24): 230520,
 (492, 601, 25): 230540,
 (492, 601, 26): 230560,
 (492, 601, 27): 230580,
 (492, 601, 28): 230600,
 (492, 601, 29): 230620,
 (492, 601, 30): 230680,
 (492, 601, 31): 230700,
 (492, 601, 32): 393180,
 (492, 601, 33): 393280,
 (492, 601, 34): 393300,
 (492, 601, 35): 393320,
 (492, 601, 36): 393340,
 (492, 601, 37): 393360,
 (492, 601, 38): 393380,
 (492, 601, 

In [102]:
avg_degree = 2 * contacts_multigraph.number_of_edges() / contacts_multigraph.number_of_nodes()

In [103]:
avg_degree

213.6304347826087

In [114]:
filehandler = open("contacts_multigraph.pickle", "wb")
pickle.dump(contacts_multigraph, filehandler)