In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
np.random.seed(1234)

from pprint import pprint

In [2]:
with open("sp500Dirs.csv", "r") as f:
    sp500Dirs = {}
    for line in f.readlines():
        line = line.split("\n")[0].split(",")
        if line[0] != "":
            sp500Dirs[line[0]] = set([l.replace("\"", "\'") for l in line[1:] if l != ""])

In [3]:
sp500Dirs["MMM"]

{"David B Dillon 'Dave'",
 "Edward M Liddy 'Ed'",
 "Gregory R Page 'Greg'",
 "Herbert L Henkel 'Herb'",
 'Inge G Thulin',
 "Michael L Eskew 'Mike'",
 'Muhtar Kent',
 'Patricia Ann Woertz',
 'Robert J Ulrich',
 'Sondra L Barbour',
 "Thomas K Brown 'Tony'",
 'Vance D Coffman'}

In [4]:
with open("sp500Execs.csv", "r") as f:
    sp500Execs = {}
    for line in f.readlines():
        line = line.split("\n")[0].split(",")
        if line[0] != "":
            sp500Execs[line[0]] = set([l.replace("\"", "\'") for l in line[1:] if l != ""])

In [5]:
with open("tick2comp.csv", "r") as f:
    tick2comp = {}
    for line in f.readlines():
        line = line.split("\n")[0].split(",")
        if line[0] != "":
            tick2comp[line[0]] = ",".join([l.replace("\"", "\'") for l in line[1:] if l != ""])

In [6]:
tick2comp["ESS"]

'Essex Property Trust, Inc.'

In [7]:
tick2comp["FB"]

'Facebook, Inc.'

In [8]:
company_company = {}
company_people  = {}
people_people   = {}
allPeople = set([])
comps = list(tick2comp.keys())

for i,tick in enumerate(comps):
    people = sp500Dirs[tick].union(sp500Execs[tick])
    allPeople = allPeople.union(people)
    for person in people:
        key = tick + "_" + person
        company_people[key] = 1
    for tick2 in comps[i+1:]:
        people2 = sp500Dirs[tick2].union(sp500Execs[tick2])
        inter = people.intersection(people2)
        if len(inter) > 0:
            key = [tick, tick2]
            key.sort()
            key = "_".join(key)
            company_company[key] = len(inter)
            if len(inter) > 1:
                inter = list(inter)
                inter.sort()
                for j, person1 in enumerate(inter):
                    for person2 in inter[j+1:]:
                        key = person1 + "_" + person2
                        if key in people_people:
                            people_people[key] += 1
                        else:
                            people_people[key] = 1

In [9]:
with open('graph.gml', 'w') as f:
    f.writelines("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
    f.writelines("<graphml>\n")
    f.writelines("<graph id=\"G\" edgedefault=\"undirected\">\n")
    for co in tick2comp.keys():
        f.writelines("<node id=\"" + co + "\">\n")
        f.writelines("<data key=\"type\">company</data>\n")
        f.writelines("</node>\n")
    for person in allPeople:
        f.writelines("<node id=\"" + person + "\">\n")
        f.writelines("<data key=\"type\">person</data>\n")
        f.writelines("</node>\n")
    edgeIDCounter = 0
    for key in company_company.keys():
        source, target = key.split("_")
        f.writelines("<edge id=\"" + str(edgeIDCounter) +\
                     "\" source=\"" + source + "\" target=\"" \
                     + target + "\">\n")
        f.writelines("<data key=\"weight\">" +\
                     str(company_company[key]) + "</data>\n")
        f.writelines("</edge>\n")
        edgeIDCounter += 1
    for key in company_people.keys():
        source, target = key.split("_")
        f.writelines("<edge id=\"" + str(edgeIDCounter) +\
                     "\" source=\"" + source + "\" target=\"" \
                     + target + "\">\n")
        f.writelines("<data key=\"weight\">" +\
                     str(company_people[key]) + "</data>\n")
        f.writelines("</edge>\n")
        edgeIDCounter += 1
    for key in people_people.keys():
        source, target = key.split("_")
        f.writelines("<edge id=\"" + str(edgeIDCounter) +\
                     "\" source=\"" + source + "\" target=\"" \
                     + target + "\">\n")
        f.writelines("<data key=\"weight\">" +\
                     str(people_people[key]) + "</data>\n")
        f.writelines("</edge>\n")
        edgeIDCounter += 1
    f.writelines("</graph>")
    f.writelines("</graphml>")

In [9]:
with open('graph.json', 'w') as f:
    f.writelines("{\n\t\"nodes\": [")
    writeComma = False
    for co in tick2comp.keys():
        if writeComma:
            f.writelines(",\n\t\t")
        else:
            f.writelines("\n\t\t")
        f.writelines("{\"id\": \"" + co + "\", \"group\": 1}")
        writeComma = True
    for person in allPeople:
        f.writelines(",\n\t\t{\"id\": \"" + person + "\", \"group\": 2}")
    f.writelines("\n\t],")
    f.writelines("\n\t\"links\": [")
    edgeIDCounter = 0
    writeComma = False
    for key in company_company.keys():
        source, target = key.split("_")
        if writeComma:
            f.writelines(",\n\t\t")
        else:
            f.writelines("\n\t\t")
        f.writelines("{\"source\": \"" + source +\
                     "\", \"target\": \"" \
                     + target + "\", \"value\": " +\
                     str(company_company[key]) +\
                     ", \"type\": \"company_company\"" + "}")
        writeComma = True
        edgeIDCounter += 1
    for key in company_people.keys():
        source, target = key.split("_")
        f.writelines(",\n\t\t{\"source\": \"" + source +\
                     "\", \"target\": \"" \
                     + target + "\", \"value\": " +\
                     str(company_people[key]) +\
                     ", \"type\": \"company_people\"" + "}")
        edgeIDCounter += 1
    for key in people_people.keys():
        source, target = key.split("_")
        f.writelines(",\n\t\t{\"source\": \"" + source +\
                     "\", \"target\": \"" \
                     + target + "\", \"value\": " +\
                     str(people_people[key]) +\
                     ", \"type\": \"people_people\""  + "}")
        edgeIDCounter += 1
    f.writelines("\n\t]\n}")

In [16]:
with open('company_graph.json', 'w') as f:
    f.writelines("{\n\t\"nodes\": [")
    writeComma = False
    for co in tick2comp.keys():
        if writeComma:
            f.writelines(",\n\t\t")
        else:
            f.writelines("\n\t\t")
        f.writelines("{\"id\": \"" + co + "\", \"group\": 1}")
        writeComma = True
    f.writelines("\n\t],")
    f.writelines("\n\t\"links\": [")
    writeComma = False
    for key in company_company.keys():
        source, target = key.split("_")
        if writeComma:
            f.writelines(",\n\t\t")
        else:
            f.writelines("\n\t\t")
        f.writelines("{\"source\": \"" + source +\
                     "\", \"target\": \"" \
                     + target + "\", \"value\": " +\
                     str(company_company[key]) + "}")
        writeComma = True
    f.writelines("\n\t]\n}")

In [10]:
with open('graph.graphml', 'w') as f:
    f.writelines("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
    f.writelines("<graphml>\n")
    f.writelines("<key id='weight' for='edge' attr.name='weight' attr.type='int'/>")
    f.writelines("<key id='type' for='node' attr.name='type' attr.type='string'/>")
    f.writelines("<graph id=\"G\" edgedefault=\"undirected\">\n")
    for co in tick2comp.keys():
        f.writelines("<node id=\"" + co + "\">\n")
        f.writelines("<data key=\"type\">company</data>\n")
        f.writelines("</node>\n")
    for person in allPeople:
        f.writelines("<node id=\"" + person + "\">\n")
        f.writelines("<data key=\"type\">person</data>\n")
        f.writelines("</node>\n")
    edgeIDCounter = 0
    for key in company_company.keys():
        source, target = key.split("_")
        f.writelines("<edge id=\"" + str(edgeIDCounter) +\
                     "\" source=\"" + source + "\" target=\"" \
                     + target + "\">\n")
        f.writelines("<data key=\"weight\">" +\
                     str(company_company[key]) + "</data>\n")
        f.writelines("</edge>\n")
        edgeIDCounter += 1
    for key in company_people.keys():
        source, target = key.split("_")
        f.writelines("<edge id=\"" + str(edgeIDCounter) +\
                     "\" source=\"" + source + "\" target=\"" \
                     + target + "\">\n")
        f.writelines("<data key=\"weight\">" +\
                     str(company_people[key]) + "</data>\n")
        f.writelines("</edge>\n")
        edgeIDCounter += 1
    for key in people_people.keys():
        source, target = key.split("_")
        f.writelines("<edge id=\"" + str(edgeIDCounter) +\
                     "\" source=\"" + source + "\" target=\"" \
                     + target + "\">\n")
        f.writelines("<data key=\"weight\">" +\
                     str(people_people[key]) + "</data>\n")
        f.writelines("</edge>\n")
        edgeIDCounter += 1
    f.writelines("</graph>")
    f.writelines("</graphml>")