In [1]:
import numpy as np
import pandas as pd
import requests
from datetime import datetime, timedelta

In [2]:
def binder_url(org, repo):
    return f'https://notebooks.gesis.org/binder/v2/gh/{org}/{repo}/master'

In [3]:
def query(time_range):
    query = 'binderhub_launch_time_seconds_count{}[{}]'
    query_selectors = "{status='success'}"
    query = query.format(query_selectors, time_range)
    print(query)
    resp = requests.get('https://notebooks.gesis.org/prometheus/api/v1/query', params={'query': query})
    data = resp.json()['data']['result']
    return data

In [4]:
def makedf(data, start_time):
    df = {'name': [], 'org': [], 'provider': [], 'launches': [], 'repo_url': [], 'binder_url': []}
    for container in data:
        repo_url = container['metric']['repo']
        pref, org, repo = container['metric']['repo'].rsplit('/', 2)
        provider = pref.replace('https://', '')
        
        values = [int(ii[1]) for ii in container['values']]
        first_value_ts = container['values'][0][0]
        first_value_dt = datetime.utcfromtimestamp(first_value_ts)
        # first_value = values[0]
        #if first_value_dt > start_time:
        #    assert first_value == 1, f'{org}/{repo}---{first_value}---{first_value_dt}---{start_time}'
        if first_value_dt > start_time:
            # NOTE first_value can be > 1 if there are simultaneous launches
            print(repo, first_value_dt, start_time)
            launches = max(values)
        else:
            launches = max(values) - min(values)
                
        # print(repo_url, launches, container['metric']['status'], container['metric']['retries'])
        if repo in df['name']:
            # same repo can have status success with different retries value
            i = df['name'].index(repo)
            df['launches'][i] += launches
        else:            
            df['name'].append(repo)
            df['org'].append(org)
            df['provider'].append(provider)
            df['launches'].append(launches)
            df['repo_url'].append(repo_url)
            df['binder_url'].append(binder_url(org, repo))
    df = pd.DataFrame(df)
    df = df.drop_duplicates(['name'])
    df = df.groupby(['name', 'org', 'provider','repo_url', 'binder_url']).sum().reset_index().sort_values('launches', ascending=False)
    df['log_launches'] = df['launches'].apply(np.log)
    return df

# The most popular repositories in last hour

In [5]:
hour = 24
data = query(f'{hour}h')
start_time = datetime.utcnow() - timedelta(hours=hour)
df = makedf(data, start_time)

binderhub_launch_time_seconds_count{status='success'}[24h]
PythonDataScienceHandbook 2018-10-10 08:34:52.682000 2018-10-09 14:24:57.985819
binder-stats 2018-10-10 08:58:52.682000 2018-10-09 14:24:57.985819
workshop_girls_day 2018-10-10 08:34:52.682000 2018-10-09 14:24:57.985819
ligo-binder 2018-10-10 08:35:52.682000 2018-10-09 14:24:57.985819
bokeh 2018-10-10 10:27:44.244000 2018-10-09 14:24:57.985819
PythonDataScienceHandbook 2018-10-10 14:08:44.244000 2018-10-09 14:24:57.985819
binder-stats 2018-10-10 09:47:44.244000 2018-10-09 14:24:57.985819
workshop_girls_day 2018-10-10 13:30:44.244000 2018-10-09 14:24:57.985819
ligo-binder 2018-10-10 14:23:44.244000 2018-10-09 14:24:57.985819
gesis-meta-analysis-2018 2018-10-09 14:25:04.084000 2018-10-09 14:24:57.985819
conda 2018-10-09 14:25:04.084000 2018-10-09 14:24:57.985819
julia-python 2018-10-09 14:25:04.084000 2018-10-09 14:24:57.985819
jupyter-extension 2018-10-09 14:25:04.084000 2018-10-09 14:24:57.985819
jupyter-stacks 2018-10-09 14:25

In [6]:
df.style.format({'repo_url':lambda x: f'<a target="_blank" href="{x}">repo url</a>', 
                 'binder_url': lambda x: f'<a target="_blank" href="{x}">binder url</a>'})

Unnamed: 0,name,org,provider,repo_url,binder_url,launches,log_launches
18,workshop_girls_day,gesiscss,github.com,repo url,binder url,25,3.21888
3,binder-stats,gesiscss,github.com,repo url,binder url,21,3.04452
2,PythonDataScienceHandbook,gesiscss,github.com,repo url,binder url,13,2.56495
14,ligo-binder,minrk,github.com,repo url,binder url,11,2.3979
8,flow,gesiscss,github.com,repo url,binder url,10,2.30259
15,ptm,gesiscss,github.com,repo url,binder url,6,1.79176
17,requirements,binder-examples,github.com,repo url,binder url,3,1.09861
12,jupyter-stacks,binder-examples,github.com,repo url,binder url,3,1.09861
11,jupyter-extension,binder-examples,github.com,repo url,binder url,1,0.0
16,remote_storage,binder-examples,github.com,repo url,binder url,1,0.0


In [None]:
datetime.utcfromtimestamp(1539094744)

In [None]:
result = 10
assert result == 100, 5