In [None]:
from os import environ
from pathlib import Path

input_folder = Path(environ.get(
    'CROSSCOMPUTE_INPUT_FOLDER', 'batches/standard/input'))
output_folder = Path(environ.get(
    'CROSSCOMPUTE_OUTPUT_FOLDER', 'batches/standard/output'))
output_folder.mkdir(parents=True, exist_ok=True)

In [None]:
import json
from os.path import join

with (input_folder / 'variables.dictionary').open('rt') as f:
    variables = json.load(f)
page_title = variables['page_title']
source_uri = variables['source_uri']
# source_uri = 'https://github.com/python-organizers/conferences'
# source_uri = 'https://github.com/python-organizers/conferences/blob/main/2022.csv'
# source_uri = 'https://raw.githubusercontent.com/python-organizers/conferences/main/2022.csv'
source_uri

In [None]:
import re

PATH_REPOSITORY_PATTERN = re.compile(
    r'^/(?P<user_name>[\w\d\-_]+)/(?P<repository_name>[\w\d\-_]+)', re.IGNORECASE)
PATH_FILE_PATTERN = re.compile(
    r'/blob/(?P<branch_name>[\w\d\-_]+)/(?P<file_path>.*)$', re.IGNORECASE)
FILE_EXTENSIONS = ['.csv']

In [None]:
import requests

def get_branch_name(user_name, repository_name):
    request_uri = f'https://api.github.com/repos/{user_name}/{repository_name}'
    response = requests.get(request_uri)
    if response.status_code == 200:
        response_d = response.json()
        branch_name = response_d['default_branch']
    else:
        branch_name = 'master'
    return branch_name

def get_file_paths(user_name, repository_name, branch_name):
    request_uri = f'https://api.github.com/repos/{user_name}/{repository_name}/git/trees/{branch_name}'
    response = requests.get(request_uri)
    file_paths = []
    if response.status_code == 200:
        response_d = response.json()
        file_ds = response_d['tree']
        # TODO: Consider getting paths from folders
        file_paths.extend(_['path'] for _ in file_ds)
    return file_paths

In [None]:
from os.path import splitext
from urllib.parse import urlparse as parse_uri

def get_source_uris(uri):
    parsed_uri = parse_uri(uri)
    hostname = parsed_uri.hostname
    path = parsed_uri.path
    source_uris = []
    if hostname == 'github.com':
        path_repository_match = PATH_REPOSITORY_PATTERN.match(path)
        path_file_match = PATH_FILE_PATTERN.search(path)
        if path_repository_match:
            path_repository_d = path_repository_match.groupdict()
            user_name = path_repository_d['user_name']
            repository_name = path_repository_d['repository_name']
            if path_file_match:
                branch_name = path_repository_d['branch_name']
            else:
                branch_name = get_branch_name(user_name, repository_name)
                for file_path in get_file_paths(user_name, repository_name, branch_name):
                    file_extension = splitext(file_path)[1].lower()
                    if file_extension not in FILE_EXTENSIONS:
                        continue
                    source_uris.append(
                        f'https://raw.githubusercontent.com/{user_name}/{repository_name}/{branch_name}/{file_path}')
    else:
        source_uris.append(uri)
    return sorted(source_uris)

source_uris = get_source_uris(source_uri)
source_uris

In [None]:
import pandas as pd

source_tables = []
for uri in source_uris:
    try:
        t = pd.read_csv(uri)
        source_tables.append(t)
    except Exception:
        pass

In [None]:
source_table = pd.concat(source_tables)
for c in [
    'Start Date',
    'End Date',
    'Talk Deadline',
]:
    source_table[c] = pd.to_datetime(source_table[c], errors='coerce')
source_table.reset_index(drop=True, inplace=True)
source_table.dropna(subset=['Start Date', 'End Date'], inplace=True)
source_table[:2]

In [None]:
from datetime import datetime

now_datetime = datetime.now()
event_table = source_table[source_table['End Date'] >= now_datetime]
len(event_table)

In [None]:
from collections import defaultdict

def get_next_events(t, k, f):
    t = t[t[k] >= now_datetime]
    return get_events(t, k, f)

def get_events(t, k, f):
    d = defaultdict(list)
    for i, r in t.sort_values(by=k).iterrows():
        this_datetime = r[k]
        if this_datetime < now_datetime:
            this_datetime = now_datetime
        d[f(this_datetime)].append(r)
    return dict(d)

In [None]:
def get_timestamp(x):
    return x.strftime('%Y %B')

proposals = get_next_events(
    event_table, 'Talk Deadline', get_timestamp)
conferences = get_events(
    event_table, 'Start Date', get_timestamp)

In [None]:
def get_proposal_text(r):
    proposal_datetime = r['Talk Deadline']
    if pd.isna(proposal_datetime) or proposal_datetime < now_datetime:
        return ''
    proposal_text = ' and '
    proposal_url = r['Proposal URL']
    proposal_datestamp = proposal_datetime.strftime(datestamp_format)
    if proposal_url:
        proposal_text += (
            f'[proposals are due {proposal_datestamp}]({proposal_url})')
    else:
        proposal_text += (
            f'proposals are due {proposal_datestamp}')
    return proposal_text

In [None]:
lines = [f'# {page_title}']
datestamp_format = '%A, %B %d, %Y'

def extend_lines(subtitle, events):
    if not events:
        return
    lines.append(f'## Upcoming {subtitle}')
    for timestamp, rows in events.items():
        lines.append(f'### {timestamp}')
        for r in rows:
            subject = r['Subject']
            start_datetime = r['Start Date']
            end_datetime = r['End Date']
            website_url = r['Website URL']
            proposal_text = get_proposal_text(r)
            lines.append(
                f'- [{subject}]({website_url}) is '
                f'from {start_datetime.strftime(datestamp_format)} '
                f'to {end_datetime.strftime(datestamp_format)}' + proposal_text)

In [None]:
extend_lines('Proposals', proposals)
extend_lines('Conferences', conferences)
lines.append(f'\nSee {source_uri}.')

In [None]:
with (output_folder / 'calendar.md').open('wt') as f:
    f.write('\n'.join(lines))