In [None]:
import requests
import json
import os
import matplotlib.pyplot as plt
from datetime import datetime
from collections import defaultdict
from IPython.display import Markdown, display

# Base GraphQL Request

In [None]:
headers = {'Authorization': 'Bearer {}'.format(os.environ['GH_API_TOKEN'])}

In [None]:
def run_query(query):
    request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers)
    if request.status_code == 200:
        return request.json()
    else:
        raise Exception('Query failed to run by returning code of {}. {}'.format(request.status_code, query))

# User

In [None]:
def fetch_user(username):
    # fetch
    query_user_meta = '''
    {{
      user(login: "{user}") {{
        email
        bio
        company
        createdAt
        name
        organizations(first: {limit}) {{
          edges {{
            node {{
              login
              name
            }}
          }}
          pageInfo {{
            endCursor
            hasNextPage
          }}
        }}
      }}
    }}
    '''
    user = run_query(query_user_meta.format(user=username, limit=100))['data']['user']
    user = {k: [el['node'] for el in v['edges']] if k == 'organizations' else v for k, v in user.items()}
    return user

# Pull Requests

In [None]:
def fetch_prs(username, ax, start=None, end=None):
    # fetch
    query_repo_meta = '''
    {{
      search({cursor}query: "is:pr involves:{user}", type: ISSUE, first: {limit}) {{
        edges {{
          node {{
            ... on PullRequest {{
              title
              url
              author {{
                login
              }}
              state
              createdAt
              baseRepository {{
                isPrivate
              }}
            }}
          }}
        }}
        pageInfo {{
          endCursor
          hasNextPage
        }}
      }}
    }}
    '''
    prs = []
    curr_cursor = ''
    while (len(prs) == 0 and 'resp' not in locals()) or curr_cursor != '':
        resp = run_query(query_repo_meta.format(user=username, limit=100, cursor=curr_cursor))
        prs[0:0] = resp['data']['search']['edges']
        curr_cursor= ('after: "{}", '.format(resp['data']['search']['pageInfo']['endCursor'])
                      if resp['data']['search']['pageInfo']['hasNextPage'] else '')
    # filter
    prs = [p for p in prs if (
        start is None or datetime.strptime(p['node']['createdAt'], '%Y-%m-%dT%H:%M:%SZ') >= start) and (
        end is None or datetime.strptime(p['node']['createdAt'], '%Y-%m-%dT%H:%M:%SZ') <= end)]
    # classify
    tmp = defaultdict(list)
    for p in prs:
        if p['node']['author']['login'] == username and p['node']['state'] == 'CLOSED':
            tmp['author.closed'].append(p)
        elif p['node']['author']['login'] == username and p['node']['state'] == 'MERGED':
            tmp['author.merged'].append(p)
        elif p['node']['author']['login'] == username and p['node']['state'] == 'OPEN':
            tmp['author.open'].append(p)
        elif p['node']['state'] == 'CLOSED':
            tmp['contributor.closed'].append(p)
        elif p['node']['state'] == 'MERGED':
            tmp['contributor.merged'].append(p)
        elif p['node']['state'] == 'OPEN':
            tmp['contributor.open'].append(p)
        else:
            tmp['unclassified'].append(p)
    category = {k: v for k, v in sorted(tmp.items())}
    gist_category = {k: len(v) for k, v in category.items()}
    # plot
    total = len(prs)
    dates = [datetime.strptime(d['node']['createdAt'], '%Y-%m-%dT%H:%M:%SZ') for d in prs]
    ax.set_title('Pull Requests: GitHub User: {user}, Total: {total},\nPeriod: {start} to {end}'.format(
        user=username, total=total, start=min(dates) if dates else 'N/A', end=max(dates) if dates else 'N/A'))
    if gist_category:
        labels = [k for k in gist_category]
        sizes = [v for v in gist_category.values()]    
        explode = [0.0 for v in gist_category] # 0.1 and above to explode out
        ax.pie(sizes, explode=explode, shadow=True, startangle=90)
        ax.legend(['{}: {:.2f}% ({:,.0f})'.format(k, v/total*100, v) for k, v in gist_category.items()], loc="best")
        ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    return prs

# Issues

In [None]:
def fetch_issues(username, ax, start=None, end=None):
    # fetch
    query_repo_meta = '''
    {{
      search({cursor}query: "is:issue involves:{user}", type: ISSUE, first: {limit}) {{
        edges {{
          node {{
            ... on Issue {{
              author {{
                login
              }}
              createdAt
              state
              url
              title
              assignees(first: 100) {{
                edges {{
                  node {{
                    login
                  }}
                }}
              }}
              repository {{
                isPrivate
              }}
            }}
          }}
        }}
        pageInfo {{
          endCursor
          hasNextPage
        }}
      }}
    }}
    '''
    issues = []
    curr_cursor = ''
    while (len(issues) == 0 and 'resp' not in locals()) or curr_cursor != '':
        resp = run_query(query_repo_meta.format(user=username, limit=100, cursor=curr_cursor))
        issues[0:0] = resp['data']['search']['edges']
        curr_cursor= ('after: "{}", '.format(resp['data']['search']['pageInfo']['endCursor'])
                      if resp['data']['search']['pageInfo']['hasNextPage'] else '')
    # filter
    issues = [i for i in issues if (
        start is None or datetime.strptime(i['node']['createdAt'], '%Y-%m-%dT%H:%M:%SZ') >= start) and (
        end is None or datetime.strptime(i['node']['createdAt'], '%Y-%m-%dT%H:%M:%SZ') <= end)]
    # classify
    tmp = defaultdict(list)
    for i in issues:
        if i['node']['author']['login'] == username and i['node']['state'] == 'CLOSED':
            tmp['author.closed'].append(i)
        elif i['node']['author']['login'] == username and i['node']['state'] == 'OPEN':
            tmp['author.open'].append(i)
        elif username in [a['node']['login'] for a in i['node']['assignees']['edges']] and i['node']['state'] == 'OPEN':
            tmp['assigned.open'].append(i)
        elif username in [a['node']['login'] for a in i['node']['assignees']['edges']] and i['node']['state'] == 'CLOSED':
            tmp['assigned.closed'].append(i)
        elif i['node']['state'] == 'OPEN':
            tmp['contributor.open'].append(i)
        elif i['node']['state'] == 'CLOSED':
            tmp['contributor.closed'].append(i)
        else:
            tmp['unclassified'].append(i)
    category = {k: v for k, v in sorted(tmp.items())}
    gist_category = {k: len(v) for k, v in category.items()}
    # plot
    total = len(issues)
    dates = [datetime.strptime(d['node']['createdAt'], '%Y-%m-%dT%H:%M:%SZ') for d in issues]
    ax.set_title('Issues: GitHub User: {user}, Total: {total},\nPeriod: {start} to {end}'.format(
        user=username, total=total, start=min(dates) if dates else 'N/A', end=max(dates) if dates else 'N/A'))
    if gist_category:
        # plot
        total = len(issues)
        dates = [datetime.strptime(d['node']['createdAt'], '%Y-%m-%dT%H:%M:%SZ') for d in issues]
        labels = [k for k in gist_category]
        sizes = [v for v in gist_category.values()]    
        explode = [0.0 for v in gist_category] # 0.1 and above to explode out
        ax.pie(sizes, explode=explode, shadow=True, startangle=90)
        ax.legend(['{}: {:.2f}% ({:,.0f})'.format(k, v/total*100, v) for k, v in gist_category.items()], loc="best")
        ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    return issues

# Repositories (Public/Private, Owner/Fork)

In [None]:
def fetch_repos(username, ax, start=None, end=None):
    # fetch
    query_repo_meta = '''
    {{
      user(login: "{user}") {{
        repositories({cursor}first: {limit}) {{
          totalCount
          edges {{
            node {{
              name
              url
              isPrivate
              createdAt
              owner {{
                login
              }}
              languages(first: 100) {{
                edges {{
                  node {{
                    name
                  }}
                }}
              }}
              isArchived
              isFork
            }}
          }}
          pageInfo {{
            endCursor
            hasNextPage
          }}
        }}
      }}
    }}
    '''
    repos = []
    curr_cursor = ''
    while (len(repos) == 0 and 'resp' not in locals()) or curr_cursor != '':
        resp = run_query(query_repo_meta.format(user=username, limit=100, cursor=curr_cursor))
        repos[0:0] = resp['data']['user']['repositories']['edges']
        curr_cursor= ('after: "{}", '.format(resp['data']['user']['repositories']['pageInfo']['endCursor'])
                      if resp['data']['user']['repositories']['pageInfo']['hasNextPage'] else '')
    # filter
    repos = [r for r in repos if (
        start is None or datetime.strptime(r['node']['createdAt'], '%Y-%m-%dT%H:%M:%SZ') >= start) and (
        end is None or datetime.strptime(r['node']['createdAt'], '%Y-%m-%dT%H:%M:%SZ') <= end)]
    # classify
    tmp = defaultdict(list)
    for r in repos:
        if r['node']['isPrivate'] and r['node']['isFork']:
            tmp['private.fork'].append(r)
        elif r['node']['isPrivate'] and not r['node']['isFork']:
            tmp['private.owner'].append(r)
        elif not r['node']['isPrivate'] and r['node']['isFork']:
            tmp['public.fork'].append(r)
        elif not r['node']['isPrivate'] and not r['node']['isFork']:
            tmp['public.owner'].append(r)
        else:
            tmp['unclassified'].append(r)
    category = {k: v for k, v in sorted(tmp.items())}
    gist_category = {k: len(v) for k, v in category.items()}
    # plot
    total = len(repos)
    dates = [datetime.strptime(d['node']['createdAt'], '%Y-%m-%dT%H:%M:%SZ') for d in repos]
    ax.set_title('Repos: GitHub User: {user}, Total: {total},\nPeriod: {start} to {end}'.format(
        user=username, total=total, start=min(dates) if dates else 'N/A', end=max(dates) if dates else 'N/A'))
    if gist_category:
        labels = [k for k in gist_category]
        sizes = [v for v in gist_category.values()]    
        explode = [0.0 for v in gist_category] # 0.1 and above to explode out
        ax.pie(sizes, explode=explode, shadow=True, startangle=90)
        ax.legend(['{}: {:.2f}% ({:,.0f})'.format(k, v/total*100, v) for k, v in gist_category.items()], loc="best")
        ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    return repos

# Repositories (Languages)

In [None]:
def fetch_repos_lang(username, ax, start=None, end=None):
    # fetch
    query_repo_meta = '''
    {{
      user(login: "{user}") {{
        repositories({cursor}first: {limit}) {{
          totalCount
          edges {{
            node {{
              name
              url
              isPrivate
              createdAt
              owner {{
                login
              }}
              languages(first: 100) {{
                edges {{
                  node {{
                    name
                  }}
                }}
              }}
              isArchived
              isFork
            }}
          }}
          pageInfo {{
            endCursor
            hasNextPage
          }}
        }}
      }}
    }}
    '''
    repos = []
    curr_cursor = ''
    while (len(repos) == 0 and 'resp' not in locals()) or curr_cursor != '':
        resp = run_query(query_repo_meta.format(user=username, limit=100, cursor=curr_cursor))
        repos[0:0] = resp['data']['user']['repositories']['edges']
        curr_cursor= ('after: "{}", '.format(resp['data']['user']['repositories']['pageInfo']['endCursor'])
                      if resp['data']['user']['repositories']['pageInfo']['hasNextPage'] else '')
    # filter
    repos = [r for r in repos if (
        start is None or datetime.strptime(r['node']['createdAt'], '%Y-%m-%dT%H:%M:%SZ') >= start) and (
        end is None or datetime.strptime(r['node']['createdAt'], '%Y-%m-%dT%H:%M:%SZ') <= end)]
    # classify
    tmp = defaultdict(list)
    for r in repos:
        [tmp[l['node']['name']].append(r) for l in r['node']['languages']['edges']]    
    category = {k: v for k, v in tmp.items()}
    gist_category = {k: len(v) for k, v in category.items()}
    gist_category = {k: v for k, v in sorted(gist_category.items(), key=lambda item: item[1], reverse=True)}
    gist_category = {k: v for k, v in sorted(gist_category.items()) if k in list(gist_category.keys())[0:10]}
    # plot
    total = len(repos)
    total_confidence = sum([v for v in gist_category.values()])
    dates = [datetime.strptime(d['node']['createdAt'], '%Y-%m-%dT%H:%M:%SZ') for d in repos]
    ax.set_title('Languages: GitHub User: {user}, Total: {total},\nPeriod: {start} to {end}'.format(
        user=username, total=total, start=min(dates) if dates else 'N/A', end=max(dates) if dates else 'N/A'))
    if gist_category:
        labels = [k for k in gist_category]
        sizes = [v for v in gist_category.values()]    
        explode = [0.0 for v in gist_category] # 0.1 and above to explode out
        ax.pie(sizes, explode=explode, shadow=True, startangle=90)
        ax.legend(['{}: {:.2f}% ({:,.0f}) {:.3f}'.format(k, v/total*100, v, v/total_confidence)
                    for k, v in gist_category.items()], loc="lower left")
        ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    return repos

# Report

In [None]:
def gen_report(**kwargs):
    user = fetch_user(username=kwargs['username'])
    display(Markdown('# **{username} ({name})**'.format(**kwargs, name=user['name'])))
    fig, axs = plt.subplots(2, 2)
    fig.set_size_inches(14, 14)
    repositories = fetch_repos_lang(**kwargs, ax=axs[0, 1])
    repositories = fetch_repos(**kwargs, ax=axs[0, 0])
    pull_requests = fetch_prs(**kwargs, ax=axs[1, 0])
    issues = fetch_issues(**kwargs, ax=axs[1, 1])
    plt.show()

## External Users

In [None]:
users = []
results = [gen_report(username=user, start=datetime(2018,7,20)) for user in users]

## DJNeuro

In [None]:
users = ['guzman-raphael', 'ixcat', 'mahos', 'dimitri-yatsenko', 'eywalker', 'shenshan', 'ttngu207']
results = [gen_report(username=user, start=datetime(2018,7,20)) for user in users]