In [1]:
%matplotlib inline
import cPickle as pkl
import pandas as pd
import networkx as nx
import numpy as np
import os,sys
import matplotlib.pyplot as plt
import seaborn as sns

from collections import defaultdict

In [2]:
rdata, udata, data, users, repos = pkl.load(open('data_structuring_150.pkl', 'rb'))

In [16]:
data.values()[:5]

[{'actions': [u'IssueCommentEvent'],
  'commits': 0,
  'time_buckets': [9],
  'times': [datetime.datetime(2015, 1, 26, 19, 5, 43)],
  'user_is_owner': False},
 {},
 {'actions': [u'CommitCommentEvent', u'CommitCommentEvent'],
  'commits': 0,
  'time_buckets': [1, 1],
  'times': [datetime.datetime(2015, 1, 1, 13, 17, 12),
   datetime.datetime(2015, 1, 1, 13, 18, 4)],
  'user_is_owner': False},
 {'actions': [u'PullRequestEvent'],
  'commits': 0,
  'time_buckets': [6],
  'times': [datetime.datetime(2015, 1, 17, 4, 17, 44)],
  'user_is_owner': False},
 {'actions': [u'IssuesEvent'],
  'commits': 0,
  'time_buckets': [6],
  'times': [datetime.datetime(2015, 1, 16, 21, 5, 43)],
  'user_is_owner': False}]

In [3]:
label_encodings = {'MemberEvent': "none",
 'PublicEvent': "design",
 'PullRequestReviewCommentEvent': "design",
 'ForkEvent': "consume",
 'GollumEvent': "design",
 'ReleaseEvent': "none",
 'PullRequestEvent': "content",
 'IssueCommentEvent': "design",
 'PushEvent': "content",
 'DeleteEvent': "content",
 'CommitCommentEvent': "design",
 'WatchEvent': "consume",
 'IssuesEvent': "design",
 'CreateEvent': "content"}

In [40]:
class MIPnet():
    def __init__(self, P, R, decay = 0.9):
        self.P = P # partner
        self.R = R # repos
        self.mip = nx.MultiGraph()
        self.mip.add_nodes_from(self.P, ntype="user")
        self.mip.add_nodes_from(self.R, ntype="repo")
        
        self.decay = decay
        
        
    # currently, user-user, repo-repo increase by max 1 in weight, 
    # could also be 1 per common edit
    def update_edges_for_time(self,ints):
        thisBucketUserRepos = defaultdict(set)
        thisBucketRepoUsers = defaultdict(set)
        # create or update user-repo edges and repo-repo edges
        for ix,i in enumerate(ints):
            cuser, crepo, ctype = i
            # repo-repo: create/update edge if not exists in current time
            if crepo not in thisBucketUserRepos[cuser] and len(thisBucketUserRepos[cuser]) > 0:
                for connectRepo in thisBucketUserRepos[cuser]:
                    if self.mip.has_edge(crepo, connectRepo):
                        self.mip[crepo][connectRepo][0]['weight']+=1
                    else:
                        self.mip.add_edge(crepo, connectRepo, weight=1)
            thisBucketUserRepos[cuser].add(crepo)

            # user-user, similar to repo-repo
            if cuser not in thisBucketRepoUsers[crepo] and len(thisBucketRepoUsers[crepo]) > 0:
                for connectUser in thisBucketRepoUsers[crepo]:
                    if self.mip.has_edge(cuser, connectUser):
                        self.mip[cuser][connectUser][0]['weight']+=1
                    else:
                        self.mip.add_edge(cuser, connectUser, weight=1)
            thisBucketRepoUsers[crepo].add(cuser)

            # user-repo
            edge_exists = False
            # check whether edge exists
            if (cuser, crepo, None) in self.mip.edges(cuser,crepo):
                # if yes, iterate over edges to find whether the correct edge exists
                for ednum,val in self.mip[cuser][crepo].iteritems():
                    if val['edittype'] == ctype:
                        edge_exists = True
                        self.mip[cuser][crepo][ednum]['weight']+=1
            if not edge_exists:
                self.mip.add_edge(cuser, crepo, weight=1, edittype=ctype)
        

In [41]:
def get_all_interactions_for_timebucket(t):
    #creates a list of time
    counter = 0
    interactions = [] #user, repo, type
    for ix,key in data.iteritems():
        try: # there is one empty set in the data...
            for time, types in zip(key['time_buckets'], key['actions']):
                if time == t:
                    if not label_encodings[types] == "none":
                        interactions.append((ix[0], ix[1], label_encodings[types]))
                #print types, time, 
            counter +=1
#         if counter > 100:
#             print interactions
#             break
        except:
            print "error" 
    print "found", len(interactions), "interactions"
    return interactions


error
found 16194 interactions


In [42]:
mip = MIPnet(users, repos)
interactions_bucket1 = get_all_interactions_for_timebucket(1)
mip.update_edges_for_time(interactions_bucket1)

In [43]:
mip.mip.edges()[:5]

[(u'Angelfirenze', u'codeschool-kiddo'),
 (u'Angelfirenze', u'deadlyvipers/dojo_rules'),
 (u'Angelfirenze', u'deadlyvipers/dojo_rules'),
 (u'Angelfirenze', u'deadlyvipers/dojo_rules'),
 (u'mozilla/sugardough', u'peterbe')]

In [38]:
for ednum,val in mip.mip["Angelfirenze"]["deadlyvipers/dojo_rules"].iteritems():
    print val

{'edittype': 'content', 'weight': 3}
{'edittype': 'design', 'weight': 1}
{'edittype': 'consume', 'weight': 1}


In [21]:
data[data.keys()[0]]

{'commits': 0,
 'times': [u'2015-01-13T02:28:21Z'],
 'types': [u'WatchEvent'],
 'user_is_owner': False}

In [71]:
print mip.mip.nodes()

