In [1]:
# import numpy as np
import pandas as pd
import requests
from datetime import datetime, timedelta

In [2]:
def binder_url(org, repo):
    return f'https://notebooks.gesis.org/binder/v2/gh/{org}/{repo}/master'

In [3]:
def ts_to_dt(ts):
    return datetime.utcfromtimestamp(ts)

# The most popular repositories

In [4]:
def query(time_range):
    query = 'binderhub_launch_time_seconds_count{}[{}]'
    query_selectors = "{status='success'}"
    query = query.format(query_selectors, time_range)
    print(query)
    resp = requests.get('https://notebooks.gesis.org/prometheus/api/v1/query', params={'query': query})
    data = resp.json()['data']['result']
    return data

In [5]:
def process_data(data, time_range_beginning):
    d = {'name': [], 'org': [], 'provider': [], 'launches': [], 'repo_url': [], 'binder_url': []}
    for container in data:
        repo_url = container['metric']['repo']
        provider, org, repo = repo_url.replace('https://', '').rsplit('/', 2)
        
        # calculate number of launches for each container
        values = [int(ii[1]) for ii in container['values']]
        first_value_ts = container['values'][0][0]
        first_value_dt = datetime.utcfromtimestamp(first_value_ts)
        if first_value_dt > time_range_beginning:
            # this container is created after beginning of time range
            # NOTE first value in container can be > 1 if there are simultaneous launches
            # first_value = values[0]
            # assert first_value == 1, f'{org}/{repo}---{first_value}---{first_value_dt}---{time_range_beginning}'
            # print(repo, first_value_dt, time_range_beginning, first_value)
            launches = max(values)
        else:
            # this container is created before beginning of time range
            launches = max(values) - min(values)
                
        # print(repo_url, launches, container['metric']['status'], container['metric']['retries'])
        if repo in d['name']:
            # same repo can have status success with different retries values
            i = d['name'].index(repo)
            d['launches'][i] += launches
        else:
            d['launches'].append(launches)
            
            d['name'].append(repo)
            d['org'].append(org)
            d['provider'].append(provider)
            d['repo_url'].append(repo_url)
            d['binder_url'].append(binder_url(org, repo))
    return d

In [6]:
def makedf(time_range, time_delta):
    data = query(f'{time_range}')
    time_range_beginning = datetime.utcnow() - time_delta
    data = process_data(data, time_range_beginning)
    df = pd.DataFrame(data)
    # df = df.drop_duplicates(['name'])
    df = df.groupby(['name', 'org', 'provider','repo_url', 'binder_url']).sum().reset_index().sort_values('launches', ascending=False)
    # df['log_launches'] = df['launches'].apply(np.log)
    df.style.format({'repo_url':lambda x: f'<a target="_blank" href="{x}">repo url</a>', 
                 'binder_url': lambda x: f'<a target="_blank" href="{x}">binder url</a>'})
    return df

## The most popular repositories in last hour

In [7]:
hour = 1
time_range = f'{hour}h'
time_delta = timedelta(hours=hour)
makedf(time_range, time_delta)

binderhub_launch_time_seconds_count{status='success'}[1h]


Unnamed: 0,name,org,provider,repo_url,binder_url,launches
0,PythonDataScienceHandbook,gesiscss,github.com,https://github.com/gesiscss/PythonDataScienceH...,https://notebooks.gesis.org/binder/v2/gh/gesis...,5
1,binder-stats,gesiscss,github.com,https://github.com/gesiscss/binder-stats,https://notebooks.gesis.org/binder/v2/gh/gesis...,4
4,workshop_girls_day,gesiscss,github.com,https://github.com/gesiscss/workshop_girls_day,https://notebooks.gesis.org/binder/v2/gh/gesis...,4
2,ligo-binder,minrk,github.com,https://github.com/minrk/ligo-binder,https://notebooks.gesis.org/binder/v2/gh/minrk...,2
3,requirements,binder-examples,github.com,https://github.com/binder-examples/requirements,https://notebooks.gesis.org/binder/v2/gh/binde...,2


## The most popular repositories in last day

In [8]:
day = 1
time_range = f'{day}d'
time_delta = timedelta(days=day)
makedf(time_range, time_delta)

binderhub_launch_time_seconds_count{status='success'}[1d]


Unnamed: 0,name,org,provider,repo_url,binder_url,launches
1,PythonDataScienceHandbook,gesiscss,github.com,https://github.com/gesiscss/PythonDataScienceH...,https://notebooks.gesis.org/binder/v2/gh/gesis...,26
9,workshop_girls_day,gesiscss,github.com,https://github.com/gesiscss/workshop_girls_day,https://notebooks.gesis.org/binder/v2/gh/gesis...,13
2,binder-stats,gesiscss,github.com,https://github.com/gesiscss/binder-stats,https://notebooks.gesis.org/binder/v2/gh/gesis...,12
6,ligo-binder,minrk,github.com,https://github.com/minrk/ligo-binder,https://notebooks.gesis.org/binder/v2/gh/minrk...,7
8,requirements,binder-examples,github.com,https://github.com/binder-examples/requirements,https://notebooks.gesis.org/binder/v2/gh/binde...,2
0,BIGSSS,JuKo007,github.com,https://github.com/JuKo007/BIGSSS,https://notebooks.gesis.org/binder/v2/gh/JuKo0...,1
3,bokeh,binder-examples,github.com,https://github.com/binder-examples/bokeh,https://notebooks.gesis.org/binder/v2/gh/binde...,1
4,flow,gesiscss,github.com,https://github.com/gesiscss/flow,https://notebooks.gesis.org/binder/v2/gh/gesis...,1
5,gesis-meta-analysis-2018,berndweiss,github.com,https://github.com/berndweiss/gesis-meta-analy...,https://notebooks.gesis.org/binder/v2/gh/bernd...,1
7,r,binder-examples,github.com,https://github.com/binder-examples/r,https://notebooks.gesis.org/binder/v2/gh/binde...,1
