In [1]:
import sys
import os
sys.path.insert(0, '../ghtesting')

import matplotlib.pyplot as plt
import pandas as pd
from ghdatabase import GHDatabase
from ghrepo import GHRepo

In [2]:
plt.style.use(['science'])

In [3]:
db = GHDatabase('ecs260', 'webframework_repos', os.environ['CONNECTION_STRING'])

repos = list(db.get_repos())

num_repos = len(repos)
print(f'Number of repos: {num_repos}')

ServerSelectionTimeoutError: localhost:27017: [Errno 111] Connection refused, Timeout: 30s, Topology Description: <TopologyDescription id: 619da73ee5fc333959ff5aa8, topology_type: Single, servers: [<ServerDescription ('localhost', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('localhost:27017: [Errno 111] Connection refused')>]>

In [None]:
rows = []

for repojson in repos:
    repo = GHRepo(repojson)
#     print(repo.url)
#     print(repo.badges[0])
    new_row = dict()
    new_row['name'] = repo.name
    for b in repo.badges[0]:
        new_row[b] = True
    rows.append(new_row)
    
df = pd.DataFrame(rows)
df = df.fillna(False).replace(1.0, True)
df

In [None]:
fig, ax = plt.subplots(dpi=300)

repo_cis_series = df[df.columns[1:]].sum(axis=1).sort_values(ascending=False).value_counts().astype(int)


ci_redun_plot = repo_cis_series.plot.bar(#title='CI Service Redundancy',
    ylabel='Number of projects'
    , xlabel='Number of enabled CI services'
    , logy=True
    , ax=ax)

ax.set_ylim([0, repo_cis_series.max() + 5000])
ci_redun_plot.bar_label(ci_redun_plot.containers[0])

fig.savefig('../plots/ci-service-redundancy.pdf', transparent = True, bbox_inches = 'tight', dpi=300)

In [None]:
fig, ax = plt.subplots(dpi=300)
ci_series = df[df.columns[1:]].sum().sort_values(ascending=False)
ci_series['other'] = 0
for idx, val in ci_series.items():
    if idx == 'other':
        continue
    if val < 50:
        ci_series['other'] = ci_series['other'] + val
        ci_series.drop(idx, inplace=True)

ci_series.plot.pie(#     title='CI Service Usage',
                ylabel=''
               , figsize=(6,6)
               , legend=False
               , ax=ax
               , autopct='%1.2f\%%')

centre_circle = plt.Circle((0,0),0.70,fc='white')
fig.gca().add_artist(centre_circle)



fig.savefig('../plots/ci-service-usage.pdf', transparent = True, bbox_inches = 'tight', pad=0, dpi=300)

In [None]:
langs = []
for repojson in repos:
    repo = GHRepo(repojson)
#     print(repo.url)
#     print(repo.badges[0])
    new_row = dict()
    new_row['name'] = repo.name
    l = repo.primarylanguage
    new_row[l] = True
    langs_df = langs.append(new_row)
    
langs_df = pd.DataFrame(langs)
langs_df = langs_df.fillna(False).replace(1.0, True)
langs_df

In [None]:
fig, ax = plt.subplots(dpi=300)

lang_series = langs_df[langs_df.columns[1:]].sum().sort_values(ascending=False)
lang_series['other'] = 0
for idx, val in lang_series.items():
    if idx == 'other':
        continue
    if val < 75:
        lang_series['other'] = lang_series['other'] + val
        lang_series.drop(idx, inplace=True)

lang_plot = lang_series.plot.bar(#title='Project Primary Language',
               ylabel='Number of projects'
               , xlabel='Primary Language'
#                , figsize=(6,6)
               , logy=True
               , ax=ax
               , legend=False)

ax.set_ylim([0, lang_series.max() + 10000])
lang_plot.bar_label(lang_plot.containers[0])

fig.savefig('../plots/project-primary-language.pdf', transparent = True, bbox_inches = 'tight', dpi=300)

In [None]:
topics = []
for repojson in repos:
    repo = GHRepo(repojson)
#     print(repo.url)
#     print(repo.badges[0])
    new_row = dict()
    new_row['name'] = repo.name
    webtopics = ['angular', 'react', 'vue']
    for t in webtopics:
        if t in repo.topics:
            new_row[t] = True
    topics_df = topics.append(new_row)

topics_df = pd.DataFrame(topics)
topics_df = topics_df.fillna(False).replace(1.0, True)
topics_df

In [None]:
from matplotlib_venn import venn3
webtopics = ['angular', 'react', 'vue']

t_df=topics_df.drop('name', axis=1).groupby(webtopics, as_index=False).size()
t_df

In [None]:
fig, ax = plt.subplots(dpi=300)
# https://towardsdatascience.com/professional-venn-diagrams-in-python-638abfff39cc
# https://pypi.org/project/matplotlib-venn/
sets = tuple(t_df['size'])

# print(sets)
# plt.title('Projects with Web Framework Topics')
venn3(subsets = sets
      , set_labels = ('Vue', 'React', 'Angular'), ax=ax)

fig.savefig('../plots/project-webframework-venn.pdf', transparent = True, bbox_inches = 'tight', dpi=300)

In [None]:
topicci = []
webtopics = ['angular', 'react', 'vue']
for t in webtopics:
    new_row = dict()
    new_row['framework'] = t
    for repojson in repos:
        repo = GHRepo(repojson)
        if t not in repo.topics:
            continue
    #     print(repo.url)
    #     print(repo.badges[0])

        for b in repo.badges[0]:
            new_row[b] = new_row.get(b, 0) + 1

    topicci_df = topicci.append(new_row)
    
topicci_df = pd.DataFrame(topicci)    
topicci_df = topicci_df.fillna(0)
topicci_df

In [None]:
topicci_dfT = topicci_df.set_index('framework').T

In [None]:
topiccipct_df = pd.DataFrame()
topiccipct_df['Angular'] = topicci_dfT['angular'] / topicci_dfT['angular'].sum()
topiccipct_df['React'] = topicci_dfT['react'] / topicci_dfT['react'].sum()
topiccipct_df['Vue'] = topicci_dfT['vue'] / topicci_dfT['vue'].sum()

In [None]:
topiccipct_df.reindex(['travisci', 'github', 'circleci', 'appveyorci', 'azure_pipelines'])

In [None]:
topic_ci_en = []
webtopics = ['angular', 'react', 'vue']
for t in webtopics:
    new_row = dict()
    new_row['framework'] = t
    for repojson in repos:
        repo = GHRepo(repojson)
        if t not in repo.topics:
            continue
    #     print(repo.url)
    #     print(repo.badges[0])
        if len(list(repo.badges[0])) > 0:
            new_row['has_ci'] = new_row.get('has_ci', 0) + 1
        else:
            new_row['no_ci'] = new_row.get('no_ci', 0) + 1


    topic_ci_en.append(new_row)
    
    
topic_ci_en_df = pd.DataFrame(topic_ci_en)
topic_ci_en_df = topic_ci_en_df.fillna(0)
topic_ci_en_df = topic_ci_en_df.set_index('framework').T
topic_ci_en_df

In [None]:
topic_ci_en_pct_df = pd.DataFrame()
topic_ci_en_pct_df['Angular'] = topic_ci_en_df['angular']['has_ci'] / topic_ci_en_df['angular'].sum()
topic_ci_en_pct_df['React'] = topic_ci_en_df['react']['has_ci'] / topic_ci_en_df['react'].sum()
topic_ci_en_pct_df['Vue'] = topic_ci_en_df['vue']['has_ci'] / topic_ci_en_df['vue'].sum()

# topiccipct_df['React'] = topicci_dfT['react'] / topicci_dfT['react'].sum()
# topiccipct_df['Vue'] = topicci_dfT['vue'] / topicci_dfT['vue'].sum() 
topic_ci_en_pct_df

In [None]:
print(topic_ci_en_df['angular']['has_ci'] / topic_ci_en_df['angular'].sum())
print(topic_ci_en_df['react']['has_ci'] / topic_ci_en_df['react'].sum())
print(topic_ci_en_df['vue']['has_ci'] / topic_ci_en_df['vue'].sum())