# Microtask 3
---
Produce a listing of repositories, as a table and as a .csv file, with the number of commits authored, issues opened, and pull requests opened, during the last three months, ordered by total number (commits plus issues plus pull requests).

In [1]:
from datetime import datetime
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search

import subprocess
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

Assuming we have a running instance of Elasticsearch locally at http://localhost:9200

In [2]:
# elasticsearch instance
es = Elasticsearch('http://localhost:9200', verify_certs=False)

In [7]:
# the following repositories will be used
repos = [
    {'org': 'chaoss', 
     'repo': 'grimoirelab-perceval', 
     'index_raw': 'perceval_r', 
     'index_enriched': 'perceval', 
     'index_github_raw': 'perceval_g_r', 
     'index_github_enriched': 'perceval_g'},
    {'org': 'chaoss',
     'repo': 'grimoirelab-kingarthur',
     'index_raw': 'arthur_r',
     'index_enriched': 'arthur',
     'index_github_raw': 'arthur_g_r',
     'index_github_enriched': 'arthur_g'},
    {'org': 'chaoss',
     'repo': 'grimoirelab-sortinghat',
     'index_raw': 'sortinghat_r',
     'index_enriched': 'sortinghat',
     'index_github_raw': 'sortinghat_g_r',
     'index_github_enriched': 'sortinghat_g'},
    {'org': 'chaoss',
     'repo': 'grimoirelab-mordred',
     'index_raw': 'mordred_r',
     'index_enriched': 'mordred',
     'index_github_raw': 'mordred_g_r',
     'index_github_enriched': 'mordred_g'},
    {'org': 'chaoss',
     'repo': 'grimoirelab-manuscripts',
     'index_raw': 'manuscripts_r',
     'index_enriched': 'manuscripts',
     'index_github_raw': 'manuscripts_g_r',
     'index_github_enriched': 'manuscripts_g'}
]
token = ''

Run `p2o.py` to extract data from these repositories

In [8]:
for repo in repos:
    print(f"Fetching git indices for {repo['repo']}")
    subprocess.run(['p2o.py', '--enrich', '--index', repo['index_raw'], '--index-enrich', repo['index_enriched'], '-e', 'http://localhost:9200', '--no_inc', '--debug', 'git', 'https://github.com/' + repo['org'] + '/' + repo['repo'] + '.git'])
    print(f"Fetching GitHub indices for {repo['repo']}")
    subprocess.run(['p2o.py', '--enrich', '--index', repo['index_github_raw'], '--index-enrich', repo['index_github_enriched'], '-e', 'http://localhost:9200', '--no_inc', '--debug', 'github', repo['org'], repo['repo'], '-t', token, '--sleep-for-rate'])
    print('\n')

Fetching git indices for grimoirelab-perceval
Fetching GitHub indices for grimoirelab-perceval


Fetching git indices for grimoirelab-kingarthur
Fetching GitHub indices for grimoirelab-kingarthur


Fetching git indices for grimoirelab-sortinghat
Fetching GitHub indices for grimoirelab-sortinghat


Fetching git indices for grimoirelab-mordred
Fetching GitHub indices for grimoirelab-mordred


Fetching git indices for grimoirelab-manuscripts
Fetching GitHub indices for grimoirelab-manuscripts




In [10]:
s = Search(using=es, index='perceval_g')
s = s.filter('range', created_at={'gte' : 'now-3M'})
s = s.sort({'created_at': {'order': 'asc'}})
result = s.execute()

In [11]:
result =  result.to_dict()['hits']['hits']

In [12]:
github = [ix['_source'] for ix in result]
github = pd.DataFrame(github)
github.head(20)

Unnamed: 0,assignee_domain,assignee_email,assignee_geolocation,assignee_location,assignee_login,assignee_name,assignee_org,author_name,closed_at,created_at,...,url,url_id,user_domain,user_email,user_geolocation,user_location,user_login,user_name,user_org,uuid
0,,,,,,,,valerio,2017-12-13T16:06:24Z,2017-12-11T17:08:20Z,...,https://github.com/chaoss/grimoirelab-perceval...,chaoss/grimoirelab-perceval/issues/251,bitergia.com,valcos@bitergia.com,"{'lat': 40.4167754, 'lon': -3.7037902}","Madrid, Spain",valeriocos,valerio,@Bitergia,45a20558b61ebd3cd85e31e0f0f87fa1b446df0a
1,,,,,,,,valerio,2018-01-29T13:24:40Z,2017-12-13T16:18:57Z,...,https://github.com/chaoss/grimoirelab-perceval...,chaoss/grimoirelab-perceval/issues/252,bitergia.com,valcos@bitergia.com,"{'lat': 40.4167754, 'lon': -3.7037902}","Madrid, Spain",valeriocos,valerio,@Bitergia,c52c2ea2b4ad34ffd3fa1f2b5281c993e6807c74
2,,,,,,,,valerio,2018-01-16T17:08:10Z,2017-12-14T18:00:59Z,...,https://github.com/chaoss/grimoirelab-perceval...,chaoss/grimoirelab-perceval/issues/253,bitergia.com,valcos@bitergia.com,"{'lat': 40.4167754, 'lon': -3.7037902}","Madrid, Spain",valeriocos,valerio,@Bitergia,a41fd70b62704e200580d1c568670afef4a7dccf
3,,,,,,,,Manrique Lopez,2017-12-18T10:47:29Z,2017-12-15T08:55:13Z,...,https://github.com/chaoss/grimoirelab-perceval...,chaoss/grimoirelab-perceval/issues/254,,,"{'lat': 40.2902056, 'lon': -3.8035477}","Fuenlabrada, Spain",jsmanrique,Manrique Lopez,Bitergia,fa0f8ad93251091fbe4c413659cbea22a663526d
4,,,,,,,,valerio,2018-01-15T18:45:58Z,2017-12-15T09:55:04Z,...,https://github.com/chaoss/grimoirelab-perceval...,chaoss/grimoirelab-perceval/issues/255,bitergia.com,valcos@bitergia.com,"{'lat': 40.4167754, 'lon': -3.7037902}","Madrid, Spain",valeriocos,valerio,@Bitergia,89e24f2bac12056516fc00fbc908d42f0788df7e
5,,,,,,,,valerio,2017-12-18T10:47:29Z,2017-12-15T11:00:28Z,...,https://github.com/chaoss/grimoirelab-perceval...,chaoss/grimoirelab-perceval/issues/256,bitergia.com,valcos@bitergia.com,"{'lat': 40.4167754, 'lon': -3.7037902}","Madrid, Spain",valeriocos,valerio,@Bitergia,c862b9aa510b1a0e751888a1f0a7533705fe73ea
6,,,,,sduenas,Santiago Dueñas,,Santiago Dueñas,2017-12-18T16:22:17Z,2017-12-18T11:39:22Z,...,https://github.com/chaoss/grimoirelab-perceval...,chaoss/grimoirelab-perceval/issues/257,,,,,sduenas,Santiago Dueñas,,98c80245cdd2d2c4cd05d5aa132655ebe8b81052
7,,,,,,,,Santiago Dueñas,2017-12-18T16:22:17Z,2017-12-18T13:26:25Z,...,https://github.com/chaoss/grimoirelab-perceval...,chaoss/grimoirelab-perceval/issues/258,,,,,sduenas,Santiago Dueñas,,ec687ef905a5900c1e26c202dcece61e06e4ec06
8,,,,,,,,Alvaro del Castillo,2017-12-21T12:41:40Z,2017-12-21T12:19:36Z,...,https://github.com/chaoss/grimoirelab-perceval...,chaoss/grimoirelab-perceval/issues/259,bitergia.com,acs@bitergia.com,"{'lat': 40.3082504, 'lon': -3.7323934}","Getafe, Madrid, Spain",acs,Alvaro del Castillo,Bitergia,18e4bb925d7dbcb9c1efddf71a5416b921ce61d2
9,,,,,sduenas,Santiago Dueñas,,Alberto Martín,2017-12-26T15:45:10Z,2017-12-21T21:34:00Z,...,https://github.com/chaoss/grimoirelab-perceval...,chaoss/grimoirelab-perceval/issues/260,,,"{'lat': 40.4167754, 'lon': -3.7037902}",Madrid,albertinisg,Alberto Martín,@Bitergia,22bba0ac12f35a47cde686093f331d83573cbb6d
