# Static Visualisations

This is an attempt to create static visualisations for the metrics manuscripts currently supports

In [1]:
import sys

sys.path.insert(0, "..")
# utility and support modules
from pprint import pprint
from datetime import datetime, timezone, timedelta
from dateutil import parser
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
plt.style.use('seaborn')

from manuscripts.manuscripts2.new_functions import Query, Index, calculate_bmi, buckets_to_df, get_timeseries, get_trend, get_aggs
from manuscripts.manuscripts2.derived_classes import Issues, PullRequests
from manuscripts.manuscripts.esquery import get_first_date_of_index

In [2]:
# declare the necessary variables
github_data_source = "perceval_github"
git_data_source = "perceval_git"

github_index = Index(index_name=github_data_source)
git_index = Index(index_name=git_data_source)

start_date = parser.parse(get_first_date_of_index("http://localhost:9200", git_data_source))
end_date = datetime.now()
end_date = end_date.replace(hour=0, minute=0, second=0, microsecond=0)

Initialise the PDF to store all the images

In [3]:
from fpdf import FPDF
pdf = FPDF()

#### Trends for Closed/Open issues and PRs

In [4]:
closed_pr = PullRequests(github_index)
closed_pr.is_closed()
# get trend by month:
closed_pr.get_cardinality("id_in_repo").by_period()
print("Trend for month: ", get_trend(get_timeseries(closed_pr)))

Trend for month:  (8, 37)


In [5]:
# get trend by quarter:
closed_pr = PullRequests(github_index)
closed_pr.is_closed()
closed_pr.get_cardinality("id_in_repo").by_period(period="quarter")
print("Trend for quarter: ", get_trend(get_timeseries(closed_pr)))

Trend for quarter:  (24, -233)


In [6]:
opened_pr = PullRequests(github_index)
# get trend by month:
opened_pr.get_cardinality("id_in_repo").by_period()
print("Trend for month: ", get_trend(get_timeseries(opened_pr)))

# get trend by quarter:
opened_pr = PullRequests(github_index)
opened_pr.get_cardinality("id_in_repo").by_period(period="quarter")
print("Trend for quarter: ", get_trend(get_timeseries(opened_pr)))

Trend for month:  (2, -300)
Trend for quarter:  (2, -1100)


In [7]:
closed_issues = Issues(github_index)
closed_issues.is_closed()
# get trend by month:
closed_issues.get_cardinality("id_in_repo").by_period(field="closed_at")
print("Trend for month: ", get_trend(get_timeseries(closed_issues)))

# get trend by quarter:
closed_issues = Issues(github_index)
closed_issues.is_closed()
closed_issues.get_cardinality("id_in_repo").by_period(period="quarter")
print("Trend for quarter: ", get_trend(get_timeseries(closed_issues)))

Trend for month:  (2, 100)
Trend for quarter:  (4, -250)


In [8]:
open_issues = Issues(github_index)
open_issues.is_open()
open_issues.get_cardinality("id_in_repo").by_period()
print("Trend for month: ", get_trend(get_timeseries(open_issues)))

# get trend by quarter:
open_issues = Issues(github_index)
open_issues.is_open()
open_issues.get_cardinality("id_in_repo").by_period(period="quarter")
print("Trend for quarter: ", get_trend(get_timeseries(open_issues)))

Trend for month:  (1, 0)
Trend for quarter:  (1, -200)


In [9]:
commits = Query(git_index)
commits.get_cardinality("hash").by_period()
print("Trend for month: ", get_trend(get_timeseries(commits)))

commits = Query(git_index)
commits.get_cardinality("hash").by_period(period="quarter")
print("Trend for quarter: ", get_trend(get_timeseries(commits)))

Trend for month:  (16, 6)
Trend for quarter:  (64, -335)


In [10]:
# PRs closed in the last month:
pr = PullRequests(github_index)
pr.is_closed()
pr.get_cardinality("id")
# May has 31 days
previous_month_date = end_date - timedelta(days=31)
pr.since(field="closed_at", start=previous_month_date).until(field="closed_at", end=end_date)
get_aggs(pr)

8

In [11]:
# PRs opened in the last month:
pr = PullRequests(github_index)
pr.get_cardinality("id")
# May has 31 days
previous_month_date = end_date - timedelta(days=31)
pr.since(start=previous_month_date).until(end=end_date)
get_aggs(pr)

10

In [12]:
# Percentile PR closed
PR = PullRequests(github_index)
PR.is_closed()
PR.get_percentiles("time_to_close_days")
# May has 31 days
previous_month_date = end_date - timedelta(days=31)
PR.since(start=previous_month_date).until(end=end_date)
get_aggs(PR)

0.10500000044703484

### Project Activities

In [13]:
# number of commits made by month 
commits = Query(git_index)
commits.since(start=start_date).until(end=end_date)
commits.get_cardinality("hash").by_period()
commits_by_month = get_timeseries(commits, dataframe=True)
commits_by_month = commits_by_month.rename(columns={"value":"number of commits"})
del commits_by_month['unixtime']

# number of active authors per month
authors = Query(git_index)
authors.get_cardinality("author_name").by_period()
authors_by_month = get_timeseries(authors, dataframe=True)
authors_by_month = authors_by_month.rename(columns={"value":"number of authors"}).fillna(0)
del authors_by_month['unixtime']

figure(figsize=(10, 15), dpi=80, facecolor='w', edgecolor='k')
plt.figure(1)

plt.subplot(211)
plt.plot(commits_by_month)
plt.title('Commit and author count by Months', fontsize=30)
plt.ylabel('Count Commits', fontsize=20)
plt.grid(True)

plt.subplot(212)
plt.plot(authors_by_month)
plt.ylabel('Count Authors', fontsize=20)
plt.grid(True)
plt.xlabel('Date', fontsize=20)
plt.savefig('images/commits_and_authors.png')

In [14]:
pdf.add_page()
pdf.image("images/commits_and_authors.png", x=5, y=5, w=200, h=280)

### Process

Here we look at the ratio of closed issues and submitted issues over time. Ideally the ratio should be 1.0 at all times which will show us that the maintainers are working tirelessly!

In [15]:
# Issues closed/ issues created
closed_issues = Issues(github_index)
closed_issues.since(start=start_date).until(end=end_date)
closed_issues.is_closed()
closed_issues.get_cardinality("id").by_period()
closed_ts = get_timeseries(closed_issues)

opened_issues =Issues(github_index)
opened_issues.since(start=start_date).until(end=end_date)
opened_issues.get_cardinality("id").by_period()
opened_ts = get_timeseries(opened_issues)

closed_opened_issues_bmi = pd.DataFrame(calculate_bmi(closed_ts, opened_ts))

In [16]:
figure(figsize=(10, 8), dpi=80, facecolor='w', edgecolor='k')
plt.bar(closed_opened_issues_bmi['period'], closed_opened_issues_bmi['bmi'], 5, color="blue")
plt.title('BMI: Closed/Submitted issues', fontsize=30)
plt.xlabel('Date', fontsize=20)
plt.ylabel('BMI', fontsize=20)
plt.savefig("images/closed_submitted_issues_bmi.png")

In [17]:
pdf.add_page()
pdf.image("images/closed_submitted_issues_bmi.png", x=5, y=5, w=200, h=200)

And, in a similar manner, we calculate BMI for closed and submitted PRs. For perceval, it's quite good!

In [18]:
# PRs closed/ PRs submitted
closed_pr = PullRequests(github_index)
closed_pr.since(start=start_date).until(end=end_date)
closed_pr.is_closed()
closed_pr.get_cardinality("id").by_period()
closed_ts = get_timeseries(closed_pr)

opened_pr = PullRequests(github_index)
opened_pr.since(start=start_date).until(end=end_date)
opened_pr.get_cardinality("id").by_period()
opened_ts = get_timeseries(opened_pr)

closed_opened_prs_bmi = pd.DataFrame(calculate_bmi(closed_ts, opened_ts))

In [19]:
figure(figsize=(15, 7.5), dpi=80, facecolor='w', edgecolor='k')
plt.bar(closed_opened_prs_bmi['period'], closed_opened_prs_bmi['bmi'], 5, color="blue")
plt.title('BMI: Closed/Submitted PRs', fontsize=30)
plt.xlabel('Date', fontsize=20)
plt.ylabel('BMI', fontsize=20)
plt.savefig("images/closed_submitted_prs_bmi.png")

In [20]:
pdf.add_page()
pdf.image("images/closed_submitted_prs_bmi.png", x=5, y=5, w=200, h=200)

Next we look at, the time to close days [average and median]  over time

In [21]:
# days to close review(PR) average
closed_pr = PullRequests(github_index)
closed_pr.since(start=start_date).until(end=end_date)
closed_pr.is_closed()
closed_pr.get_average("time_to_close_days").by_period()
averages = get_timeseries(closed_pr, dataframe=True)
averages = averages.rename(columns={"value":"average"})

closed_pr.get_percentiles("time_to_close_days").by_period()
percentiles = get_timeseries(closed_pr, dataframe=True)
median = percentiles.rename(columns={"value":"median"})

averages_and_median = pd.concat([averages, median], axis=1)

In [22]:
del averages_and_median['unixtime']

In [23]:
ax = averages_and_median.plot.bar(figsize=(20,10))
ticklabels = averages_and_median.index.strftime('%Y-%m-%d')
ax.xaxis.set_major_formatter(matplotlib.ticker.FixedFormatter(ticklabels))
plt.title('Time to close days: PRs', fontsize=30)
plt.savefig("images/averages_and_median_prs.png")

In [24]:
pdf.add_page()
pdf.image("images/averages_and_median_prs.png", x=5, y=5, w=200, h=200)

In [25]:
pdf.output('pdfs/sample_static_metrics.pdf', "F")

''