In [1]:
import pandas as pd

In [2]:
import requests

In [3]:
def binder_url(org, repo):
    return f'https://notebooks.gesis.org/binder/v2/gh/{org}/{repo}/master'

In [4]:
from datetime import datetime
def ts_to_dt(ts):
    return datetime.utcfromtimestamp(ts)

# The most popular repositories

In [5]:
REPOS_TO_FILTER = [
    'gesiscss/binder-stats'
]

In [6]:
def query(time_range, filter="{status='success'}"):
    query = f"binderhub_launch_count_total{filter}[{time_range}]"
    print(query)
    resp = requests.get('https://notebooks.gesis.org/prometheus/api/v1/query', params={'query': query})
    data = resp.json()['data']['result']
    return data

In [7]:
def process_data(data):
    d = {}  # {repo: [repo, org, provider, launches, repo_url, binder_url]}
    value_times = []
    for container in data:
        repo_url = container['metric']['repo']
        provider = container['metric']['provider']
        provider_, org, repo = repo_url.replace('https://', '').rsplit('/', 2)
        if f'{org}/{repo}' in REPOS_TO_FILTER:
            continue
        
        # calculate number of launches for each repo = max value in time
        launches = max([int(i[1]) for i in container['values']])
        value_times.append(min([float(i[0]) for i in container['values']]))
        if repo not in d:
            d[repo] = [repo, org, provider, launches, repo_url, binder_url(org, repo)]
        else:
            # same repo can be launched on different instances (after a new deployment/update)
            d[repo][3] += launches
    print('Since', ts_to_dt(min(value_times)), 'in UTC')
    return d

In [8]:
def get_popular_repos(time_range):
    data = query(f'{time_range}')
    data = process_data(data)
    return data

In [9]:
def display_df(time_range):
    data = get_popular_repos(time_range)
    df = pd.DataFrame(list(data.values()), 
                      columns = ['repo', 'org', 'provider', 'launches', 'repo_url', 'binder_url'])
    df = df.sort_values('launches', ascending=False).reset_index(drop=True)
    df = df.style.format({'repo_url':lambda x: f'<a target="_blank" href="{x}">repo url</a>', 
                          'binder_url': lambda x: f'<a target="_blank" href="{x}">binder url</a>'})
    display(df)
    print("Total number of launches: " + str(sum(df.data['launches'])) + " in "+ str(time_range))

## Most popular repositories in last hour

In [10]:
display_df('1h')

binderhub_launch_count_total{status='success'}[1h]
Since 2018-11-08 11:04:27.883000 in UTC


Unnamed: 0,repo,org,provider,launches,repo_url,binder_url
0,PythonDataScienceHandbook,gesiscss,GitHub,7,repo url,binder url
1,workshop_girls_day,gesiscss,GitHub,3,repo url,binder url
2,ligo-binder,minrk,GitHub,2,repo url,binder url
3,tetris-binder,djsegal,GitHub,1,repo url,binder url
4,ptm,gesiscss,GitHub,1,repo url,binder url
5,demo-jupyterlab,raphbacher,GitHub,1,repo url,binder url


Total number of launches: 15 in 1h


## Most popular repositories in the last day

In [11]:
display_df('1d')

binderhub_launch_count_total{status='success'}[1d]
Since 2018-11-07 12:04:11.775000 in UTC


Unnamed: 0,repo,org,provider,launches,repo_url,binder_url
0,PythonDataScienceHandbook,gesiscss,GitHub,7,repo url,binder url
1,workshop_girls_day,gesiscss,GitHub,4,repo url,binder url
2,ptm,gesiscss,GitHub,2,repo url,binder url
3,ligo-binder,minrk,GitHub,2,repo url,binder url
4,ThinkDSP,AllenDowney,GitHub,1,repo url,binder url
5,requirements,binder-examples,GitHub,1,repo url,binder url
6,elegant-scipy,elegant-scipy,GitHub,1,repo url,binder url
7,jupyter4kids,mikkokotila,GitHub,1,repo url,binder url
8,dl-workshop,pacm,GitHub,1,repo url,binder url
9,tetris-binder,djsegal,GitHub,1,repo url,binder url


Total number of launches: 22 in 1d


## Most popular repositories in the last 30 days 

In [12]:
display_df('30d')

binderhub_launch_count_total{status='success'}[30d]
Since 2018-11-07 08:13:11.776000 in UTC


Unnamed: 0,repo,org,provider,launches,repo_url,binder_url
0,PythonDataScienceHandbook,gesiscss,GitHub,7,repo url,binder url
1,workshop_girls_day,gesiscss,GitHub,4,repo url,binder url
2,ptm,gesiscss,GitHub,2,repo url,binder url
3,ligo-binder,minrk,GitHub,2,repo url,binder url
4,ThinkDSP,AllenDowney,GitHub,1,repo url,binder url
5,requirements,binder-examples,GitHub,1,repo url,binder url
6,elegant-scipy,elegant-scipy,GitHub,1,repo url,binder url
7,jupyter4kids,mikkokotila,GitHub,1,repo url,binder url
8,dl-workshop,pacm,GitHub,1,repo url,binder url
9,tetris-binder,djsegal,GitHub,1,repo url,binder url


Total number of launches: 22 in 30d


## Most popular repositories in the last 60 days 

In [13]:
display_df('60d')

binderhub_launch_count_total{status='success'}[60d]
Since 2018-11-07 08:13:11.776000 in UTC


Unnamed: 0,repo,org,provider,launches,repo_url,binder_url
0,PythonDataScienceHandbook,gesiscss,GitHub,7,repo url,binder url
1,workshop_girls_day,gesiscss,GitHub,4,repo url,binder url
2,ptm,gesiscss,GitHub,2,repo url,binder url
3,ligo-binder,minrk,GitHub,2,repo url,binder url
4,ThinkDSP,AllenDowney,GitHub,1,repo url,binder url
5,requirements,binder-examples,GitHub,1,repo url,binder url
6,elegant-scipy,elegant-scipy,GitHub,1,repo url,binder url
7,jupyter4kids,mikkokotila,GitHub,1,repo url,binder url
8,dl-workshop,pacm,GitHub,1,repo url,binder url
9,tetris-binder,djsegal,GitHub,1,repo url,binder url


Total number of launches: 22 in 60d
