get all launches, which contains "corona" or "covid" in `spec`, from [Binder Gallery API](https://notebooks.gesis.org/gallery/api/v1.0/) and write them into a csv file

In [None]:
import requests
import csv
import pandas as pd
from datetime import datetime, date, timedelta
from time import sleep

In [None]:
# select a date which is early enough to get all launches of covid repos
from_dt = datetime(2019,12,1)
# from_dt = datetime(2020,6,5)
to_dt = datetime(2020,9,10)

In [None]:
# launches = []

# # from_dt and to_dt: Date and time in ISO 8601 format in UTC, e.g. 2019-05-31T16:17:56.946703
# api_query_url = f'https://notebooks.gesis.org/gallery/api/v1.0/launches/{from_dt.isoformat()}/{to_dt.isoformat()}'

# # because of pagination the api gives 500 results per page
# # so for analysis you have to take data in all pages
# next_page = 1
# while next_page is not None:
#     r = requests.get(api_query_url, params={'page': str(next_page)})
#     # ex: https://notebooks.gesis.org/gallery/api/v1.0/launches/2019-12-01T00:00:00/2020-06-06T00:00:00?page=1
#     response = r.json()

#     if r.status_code == 429:
#         # check the limit for # queries per second/minute
#         #print(f'429: {response["message"]}')
#         sleep(1)
#         continue

#     assert r.status_code == 200
        
#     for launch in response['launches']:
#         # get only covid related repos
#         if "covid" in launch["spec"] or "corona" in launch["spec"]:
#             launches.append(launch)
#     # print(launch["timestamp"], next_page, len(launches))
#     next_page = response['next_page']


In [None]:
launches = []
archive_date = from_dt
delta = timedelta(days=1)
while archive_date <= to_dt:
    archive_url = f"https://archive.analytics.mybinder.org/events-{archive_date.date()}.jsonl"
    df = pd.read_json(archive_url, lines=True)
    if 'ref' not in df.columns:
        df['ref'] = ""
    df["timestamp"] = df["timestamp"].dt.strftime("%Y-%m-%dT%H:%M:%S")
    for launch in df.to_dict(orient="records"):
        # get only covid related repos
        if "covid" in launch["spec"] or "corona" in launch["spec"]:
            launches.append(launch)
    # print(archive_url, len(launches))
    archive_date += delta

In [None]:
print(len(launches))

In [None]:
# write launches into csv file
header = launches[0].keys()
filename = f"covid_binder_launches_{from_dt.date()}_{to_dt.date()}.csv".replace("-", "_")
with open(filename, "w") as output_file:
    dict_writer = csv.DictWriter(output_file, header)
    dict_writer.writeheader()
    dict_writer.writerows(launches)