# Microtask 3
---
Produce a listing of repositories, as a table and as a .csv file, with the number of commits authored, issues opened, and pull requests opened, during the last three months, ordered by total number (commits plus issues plus pull requests).

In [1]:
from datetime import datetime
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search

import subprocess
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

Assuming we have a running instance of Elasticsearch locally at http://localhost:9200

In [2]:
# elasticsearch instance
es = Elasticsearch('http://localhost:9200', verify_certs=False)

In [7]:
# the following repositories will be used
repos = [
    {'org': 'chaoss', 
     'repo': 'grimoirelab-perceval', 
     'index_raw': 'perceval_r', 
     'index_enriched': 'perceval', 
     'index_github_raw': 'perceval_g_r', 
     'index_github_enriched': 'perceval_g'},
    {'org': 'chaoss',
     'repo': 'grimoirelab-kingarthur',
     'index_raw': 'arthur_r',
     'index_enriched': 'arthur',
     'index_github_raw': 'arthur_g_r',
     'index_github_enriched': 'arthur_g'},
    {'org': 'chaoss',
     'repo': 'grimoirelab-sortinghat',
     'index_raw': 'sortinghat_r',
     'index_enriched': 'sortinghat',
     'index_github_raw': 'sortinghat_g_r',
     'index_github_enriched': 'sortinghat_g'},
    {'org': 'chaoss',
     'repo': 'grimoirelab-mordred',
     'index_raw': 'mordred_r',
     'index_enriched': 'mordred',
     'index_github_raw': 'mordred_g_r',
     'index_github_enriched': 'mordred_g'},
    {'org': 'chaoss',
     'repo': 'grimoirelab-manuscripts',
     'index_raw': 'manuscripts_r',
     'index_enriched': 'manuscripts',
     'index_github_raw': 'manuscripts_g_r',
     'index_github_enriched': 'manuscripts_g'}
]
token = ''

Run `p2o.py` to extract data from these repositories

In [8]:
for repo in repos:
    print(f"Fetching git indices for {repo['repo']}")
    subprocess.run(['p2o.py', '--enrich', '--index', repo['index_raw'], '--index-enrich', repo['index_enriched'], '-e', 'http://localhost:9200', '--no_inc', '--debug', 'git', 'https://github.com/' + repo['org'] + '/' + repo['repo'] + '.git'])
    print(f"Fetching GitHub indices for {repo['repo']}")
    subprocess.run(['p2o.py', '--enrich', '--index', repo['index_github_raw'], '--index-enrich', repo['index_github_enriched'], '-e', 'http://localhost:9200', '--no_inc', '--debug', 'github', repo['org'], repo['repo'], '-t', token, '--sleep-for-rate'])
    print('\n')

Fetching git indices for grimoirelab-perceval
Fetching GitHub indices for grimoirelab-perceval


Fetching git indices for grimoirelab-kingarthur
Fetching GitHub indices for grimoirelab-kingarthur


Fetching git indices for grimoirelab-sortinghat
Fetching GitHub indices for grimoirelab-sortinghat


Fetching git indices for grimoirelab-mordred
Fetching GitHub indices for grimoirelab-mordred


Fetching git indices for grimoirelab-manuscripts
Fetching GitHub indices for grimoirelab-manuscripts




In [17]:
s = Search(using=es, index='perceval_g')
s = s.filter('range', created_at={'gte' : 'now-3M'})
s = s.source(['time_to_close_days', 'time_open_days', 'item_type', 'created_at', 'closed_at', 'author_name', 'id_in_repo'])
s = s.sort({'created_at': {'order': 'asc'}})
result = s.execute()

In [18]:
result =  result.to_dict()['hits']['hits']
result

[{'_id': '45a20558b61ebd3cd85e31e0f0f87fa1b446df0a',
  '_index': 'perceval_g',
  '_score': None,
  '_source': {'author_name': 'valerio',
   'closed_at': '2017-12-13T16:06:24Z',
   'created_at': '2017-12-11T17:08:20Z',
   'id_in_repo': '251',
   'item_type': 'pull request',
   'time_open_days': 1.96,
   'time_to_close_days': 1.96},
  '_type': 'items',
  'sort': [1513012100000]},
 {'_id': 'c52c2ea2b4ad34ffd3fa1f2b5281c993e6807c74',
  '_index': 'perceval_g',
  '_score': None,
  '_source': {'author_name': 'valerio',
   'closed_at': '2018-01-29T13:24:40Z',
   'created_at': '2017-12-13T16:18:57Z',
   'id_in_repo': '252',
   'item_type': 'pull request',
   'time_open_days': 46.88,
   'time_to_close_days': 46.88},
  '_type': 'items',
  'sort': [1513181937000]},
 {'_id': 'a41fd70b62704e200580d1c568670afef4a7dccf',
  '_index': 'perceval_g',
  '_score': None,
  '_source': {'author_name': 'valerio',
   'closed_at': '2018-01-16T17:08:10Z',
   'created_at': '2017-12-14T18:00:59Z',
   'id_in_repo': '

In [20]:
github = [ix['_source'] for ix in result]
github = pd.DataFrame(github)
github

Unnamed: 0,author_name,closed_at,created_at,id_in_repo,item_type,time_open_days,time_to_close_days
0,valerio,2017-12-13T16:06:24Z,2017-12-11T17:08:20Z,251,pull request,1.96,1.96
1,valerio,2018-01-29T13:24:40Z,2017-12-13T16:18:57Z,252,pull request,46.88,46.88
2,valerio,2018-01-16T17:08:10Z,2017-12-14T18:00:59Z,253,pull request,32.96,32.96
3,Manrique Lopez,2017-12-18T10:47:29Z,2017-12-15T08:55:13Z,254,issue,3.08,3.08
4,valerio,2018-01-15T18:45:58Z,2017-12-15T09:55:04Z,255,pull request,31.37,31.37
5,valerio,2017-12-18T10:47:29Z,2017-12-15T11:00:28Z,256,pull request,2.99,2.99
6,Santiago Dueñas,2017-12-18T16:22:17Z,2017-12-18T11:39:22Z,257,issue,0.2,0.2
7,Santiago Dueñas,2017-12-18T16:22:17Z,2017-12-18T13:26:25Z,258,pull request,0.12,0.12
8,Alvaro del Castillo,2017-12-21T12:41:40Z,2017-12-21T12:19:36Z,259,pull request,0.02,0.02
9,Alberto Martín,2017-12-26T15:45:10Z,2017-12-21T21:34:00Z,260,issue,4.76,4.76


In [21]:
github.sort_values(by='item_type')

Unnamed: 0,author_name,closed_at,created_at,id_in_repo,item_type,time_open_days,time_to_close_days
3,Manrique Lopez,2017-12-18T10:47:29Z,2017-12-15T08:55:13Z,254,issue,3.08,3.08
6,Santiago Dueñas,2017-12-18T16:22:17Z,2017-12-18T11:39:22Z,257,issue,0.2,0.2
9,Alberto Martín,2017-12-26T15:45:10Z,2017-12-21T21:34:00Z,260,issue,4.76,4.76
0,valerio,2017-12-13T16:06:24Z,2017-12-11T17:08:20Z,251,pull request,1.96,1.96
1,valerio,2018-01-29T13:24:40Z,2017-12-13T16:18:57Z,252,pull request,46.88,46.88
2,valerio,2018-01-16T17:08:10Z,2017-12-14T18:00:59Z,253,pull request,32.96,32.96
4,valerio,2018-01-15T18:45:58Z,2017-12-15T09:55:04Z,255,pull request,31.37,31.37
5,valerio,2017-12-18T10:47:29Z,2017-12-15T11:00:28Z,256,pull request,2.99,2.99
7,Santiago Dueñas,2017-12-18T16:22:17Z,2017-12-18T13:26:25Z,258,pull request,0.12,0.12
8,Alvaro del Castillo,2017-12-21T12:41:40Z,2017-12-21T12:19:36Z,259,pull request,0.02,0.02


In [29]:
issues = github.where(github.item_type == 'issue').dropna()
issues

Unnamed: 0,author_name,closed_at,created_at,id_in_repo,item_type,time_open_days,time_to_close_days
3,Manrique Lopez,2017-12-18T10:47:29Z,2017-12-15T08:55:13Z,254,issue,3.08,3.08
6,Santiago Dueñas,2017-12-18T16:22:17Z,2017-12-18T11:39:22Z,257,issue,0.2,0.2
9,Alberto Martín,2017-12-26T15:45:10Z,2017-12-21T21:34:00Z,260,issue,4.76,4.76
