In [1]:
import numpy as np
import pandas as pd
import requests

In [2]:
def binder_url(org, repo):
    return f'https://notebooks.gesis.org/binder/v2/gh/{org}/{repo}/master'

In [3]:
def query(time_range):
    query = 'binderhub_launch_time_seconds_count{}[{}]'
    query_selectors = "{status='success'}"
    query = query.format(query_selectors, time_range)
    print(query)
    resp = requests.get('https://notebooks.gesis.org/prometheus/api/v1/query', params={'query': query})
    data = resp.json()['data']['result']
    return data

In [4]:
def makedf(data):
    df = {'name': [], 'org': [], 'provider': [], 'launches': [], 'repo_url': [], 'binder_url': []}
    for container in data:
        if 'repo' not in container['metric']:
            continue
        repo_url = container['metric']['repo']
        pref, org, repo = container['metric']['repo'].rsplit('/', 2)
        provider = pref.replace('https://', '')
        counts = [int(ii[1]) for ii in container['values']]
        launches = np.max(counts)
#         print(repo_url, launches, container['metric']['status'])
        if repo in df['name']:
            # same repo can have status success with different retries value
            i = df['name'].index(repo)
            df['launches'][i] += launches
        else:            
            df['name'].append(repo)
            df['org'].append(org)
            df['provider'].append(provider)
            df['launches'].append(launches)
            df['repo_url'].append(repo_url)
            df['binder_url'].append(binder_url(org, repo))
    df = pd.DataFrame(df)
    df = df.drop_duplicates(['name'])
    df = df.groupby(['name', 'org', 'provider','repo_url', 'binder_url']).sum().reset_index().sort_values('launches', ascending=False)
    df['log_launches'] = df['launches'].apply(np.log)
    return df

In [5]:
def query2(time_range):
    query = 'sum(max_over_time(binderhub_launch_time_seconds_count{}[{}])) by (repo)'
    query_selectors = "{status='success'}"
    query = query.format(query_selectors, time_range)
    print(query)
    resp = requests.get('https://notebooks.gesis.org/prometheus/api/v1/query', params={'query': query})
    data = resp.json()['data']['result']
    return data

In [6]:
def makedf2(data):
    df = {'name': [], 'org': [], 'provider': [], 'launches': [], 'repo_url': [], 'binder_url': []}
    for container in data:
        repo_url = container['metric']['repo']
        pref, org, repo = container['metric']['repo'].rsplit('/', 2)
        provider = pref.replace('https://', '')
        df['name'].append(repo)
        df['org'].append(org)
        df['provider'].append(provider)
        df['launches'].append(int(container['value'][1]))
        df['repo_url'].append(repo_url)
        df['binder_url'].append(binder_url(org, repo))
    df = pd.DataFrame(df)
    df = df.drop_duplicates(['name'])
    df = df.groupby(['name', 'org', 'provider','repo_url', 'binder_url']).sum().reset_index().sort_values('launches', ascending=False)
    df['log_launches'] = df['launches'].apply(np.log)
    return df

In [7]:
# def make_clickable(val):
#     return f'<a href="{val}">{val}</a>'

# The most popular repositories in last hour

In [8]:
data = query('1h')
df = makedf(data)
data2 = query2('1h')
df2 = makedf2(data2)

binderhub_launch_time_seconds_count{status='success'}[1h]
sum(max_over_time(binderhub_launch_time_seconds_count{status='success'}[1h])) by (repo)


In [9]:
df.style.format({'repo_url':lambda x: f'<a href="{x}">repo url</a>', 
                 'binder_url': lambda x: f'<a href="{x}">binder url</a>'})

Unnamed: 0,name,org,provider,repo_url,binder_url,launches,log_launches
17,workshop_girls_day,gesiscss,github.com,repo url,binder url,13,2.56495
3,binder-stats,gesiscss,github.com,repo url,binder url,11,2.3979
7,flow,gesiscss,github.com,repo url,binder url,10,2.30259
2,PythonDataScienceHandbook,gesiscss,github.com,repo url,binder url,6,1.79176
14,ptm,gesiscss,github.com,repo url,binder url,6,1.79176
11,jupyter-stacks,binder-examples,github.com,repo url,binder url,3,1.09861
16,requirements,binder-examples,github.com,repo url,binder url,3,1.09861
13,ligo-binder,minrk,github.com,repo url,binder url,3,1.09861
15,remote_storage,binder-examples,github.com,repo url,binder url,1,0.0
12,jupyterlab,binder-examples,github.com,repo url,binder url,1,0.0


In [10]:
df2.style.format({'repo_url':lambda x: f'<a href="{x}">repo url</a>', 
                 'binder_url': lambda x: f'<a href="{x}">binder url</a>'})

Unnamed: 0,name,org,provider,repo_url,binder_url,launches,log_launches
17,workshop_girls_day,gesiscss,github.com,repo url,binder url,13,2.56495
3,binder-stats,gesiscss,github.com,repo url,binder url,11,2.3979
7,flow,gesiscss,github.com,repo url,binder url,10,2.30259
2,PythonDataScienceHandbook,gesiscss,github.com,repo url,binder url,6,1.79176
14,ptm,gesiscss,github.com,repo url,binder url,6,1.79176
11,jupyter-stacks,binder-examples,github.com,repo url,binder url,3,1.09861
16,requirements,binder-examples,github.com,repo url,binder url,3,1.09861
13,ligo-binder,minrk,github.com,repo url,binder url,3,1.09861
15,remote_storage,binder-examples,github.com,repo url,binder url,1,0.0
12,jupyterlab,binder-examples,github.com,repo url,binder url,1,0.0


In [11]:
df.equals(df2)

True

In [12]:
df == df2

Unnamed: 0,name,org,provider,repo_url,binder_url,launches,log_launches
17,True,True,True,True,True,True,True
3,True,True,True,True,True,True,True
7,True,True,True,True,True,True,True
2,True,True,True,True,True,True,True
14,True,True,True,True,True,True,True
11,True,True,True,True,True,True,True
16,True,True,True,True,True,True,True
13,True,True,True,True,True,True,True
15,True,True,True,True,True,True,True
12,True,True,True,True,True,True,True


# The most popular repositories of the day 

In [None]:
data = query('1d')
df = makedf(data)
df.style.format({'url': make_clickable})

# The most popular repositories of the last week

In [None]:
data = query('7d')
df = makedf(data)
df.style.format({'url': make_clickable})

# The most popular repositories of the last 30 days

In [None]:
data = query('30d')
df = makedf(data)
df.style.format({'url': make_clickable})

# The most popular repositories of last 60 days

In [None]:
data = query('60d')
df = makedf(data)
df.style.format({'url': make_clickable})