In [None]:
import json
import time
import msgpack
import logbook
import network

In [None]:
exp_name = "19_find_degree"
project_file = "data/projects-2016-10-14-dedup.json"
edges_file = "output/17_create_coeditor/2017-04-26 14:53:15 1b4e2e0/%d-coeditor.mp"
out_file = "%d-degree.csv"

In [None]:
exp = logbook.Experiment(exp_name)
log = exp.get_logger()
log.info("Beginning")

In [None]:
log.info("Loading projects")
project_ids = []
with open(project_file, "rb") as f:
    for row in f:
        data = json.loads(row)
        project_ids.append(data["project_id"])

In [None]:
last = time.time()
try:
    for project_id in project_ids:
        with open(exp.get_filename(out_file % project_id), "wb") as out:
            out.write(",".join(["project_id", "contributor_id", "in_degree", "out_degree"]) + "\n")
            if time.time() - last > 60:
                time.sleep(0.1)
            log.info("Calculating degree for project %d" % project_id)
            all_nodes = set()
            in_degree = {}
            out_degree = {}
            with open(edges_file % project_id, "rb") as f:
                unpacker = msgpack.Unpacker(f)
                for o in unpacker:
                    node = o[0][0]
                    edges_from = o[1]
                    all_nodes.add(node)
                    all_nodes |= set(edges_from)
                    out_degree[node] = len(edges_from)
                    for target in edges_from:
                        try:
                            in_degree[target] += 1
                        except KeyError:
                            in_degree[target] = 1
            for node in all_nodes:
                out.write(",".join([
                    str(project_id),
                    str(node),
                    str(in_degree.get(node, 0)),
                    str(out_degree.get(node, 0))]) + "\n")
            log.info("  Done counting degrees")
    log.info("Done with all projects")
except:
    log.error(str(sys.exc_info()))
    raise