In [1]:
from datetime import datetime
import pandas as pd
from dateutil.parser import parse
from perceval.backends.core.git import Git
from perceval.backends.core.github import GitHub
import requests
import pathpy as pp

In [None]:

repo = GitHub(owner="torvalds",repository='linux',api_token=["<git api token>"],sleep_for_rate= True)

nodeList = []
nodeIssueList=[]
edgeList = []
temporal_edgeList=[]

# fetching issues and users from torvalds/linux repo between the interval 9/9/2020 to 10/20/2020
for item in repo.fetch(category='issue', from_date=datetime(2020, 9, 9, 0, 0), to_date=datetime(2020,10,20,0,0)):
        nodeIssueList.append([item['search_fields']['item_id'],item['data']['number']])
        
        URL = (item['data']['timeline_url'])
        PARAMS={'per_page':'100'}
        Headers = {'Authorization': 'token <git api token>'}
        #querying timeline URL to fetch various events associated with an issue, to understand user activities.
        #user can comment, subscribe, react, push commits etc to an event
        r = requests.get(url = URL, params=PARAMS, headers=Headers)
        repos=r.json()
        while 'next' in r.links.keys():
          r=requests.get(r.links['next']['url'])
          repos.extend(r.json())
        
        for data in repos:
            
            if('created_at' in data):
                datetime = parse(data['created_at'],ignoretz="True").isoformat()
            if('submitted_at' in data):
                datetime = parse(data['submitted_at'],ignoretz="True").isoformat()

            #creating 3 types of csv - issues.csv, users.csv, edgeList.csv --> to generate dynamic graph using gephi
            #creating csv - temporal_edgelist.csv to generate temporal graph using pathpy 
            if('user' in data and data['user'] != None):
                nodeList.append([data['user']['id'],data['user']['login']])
                edgeList.append([data['user']['id'],item['search_fields']['item_id'],'directed',data['event'],datetime,1])
                temporal_edgeList.append([data['user']['id'],item['search_fields']['item_id'],datetime])
            if('actor' in data and data['actor'] != None):
                nodeList.append([data['actor']['id'],data['actor']['login']])
                edgeList.append([data['actor']['id'],item['search_fields']['item_id'],'directed',data['event'],datetime,1])
                temporal_edgeList.append([data['actor']['id'],item['search_fields']['item_id'],datetime])

#remove duplicate users
nodeList1 = [i for n, i in enumerate(nodeList) if i not in nodeList[:n]]
nodeDF = pd.DataFrame(nodeList1, columns=['id','label'])
nodeDF.to_csv('../data/users.csv',sep=',', encoding='utf-8',index=False)

nodeIssueListDF = pd.DataFrame(nodeIssueList, columns=['id','label'])
nodeIssueListDF.to_csv('../data/issues.csv',sep=',', encoding='utf-8',index=False)

edgeDF = pd.DataFrame(edgeList,columns=['source','target','type','label','timeset','weight'])
edgeDF.to_csv('../data/edgeList.csv',sep=',', encoding='utf-8',index=False)

temporal_edgeDF = pd.DataFrame(temporal_edgeList,columns=['source','target','time'])
temporal_edgeDF.to_csv('../data/temporal_edgeList.csv',sep=',', encoding='utf-8',index=False)

In [None]:
t = pp.TemporalNetwork.read_file('../data/temporal_edgeList.csv',separator=',', timestamp_format='%Y-%m-%dT%H:%M:%S' ,directed=True, time_rescale=2)
print(t)

In [None]:
style = {
    # some default parameters
    'width': 1200,
    'height': 1000,
    'ts_per_frame': 50, 
    'ms_per_frame': 50,
    'inactive_edge_width': 4.0,
    'active_edge_width': 6.0,
    'label_offset': [0,-16],    
    'node_size': 10,
    'label_size': '14px', 
}
pp.visualisation.plot(t,**style)

In [None]:
pp.visualisation.export_html(t, '../images/users_to_issue_temporal_network.html', **style)