In [None]:
from os import environ
from pathlib import Path

input_folder = Path(environ.get(
    'CROSSCOMPUTE_INPUT_FOLDER', 'batches/standard/input'))
output_folder = Path(environ.get(
    'CROSSCOMPUTE_OUTPUT_FOLDER', 'batches/standard/output'))

In [None]:
import json
from os.path import join

with (input_folder / 'variables.dictionary').open('rt') as f:
    variables = json.load(f)
uri = variables['uri']
# uri = 'https://github.com/python-organizers/conferences'
# uri = 'https://github.com/python-organizers/conferences/blob/main/2022.csv'
# uri = 'https://raw.githubusercontent.com/python-organizers/conferences/main/2022.csv'
uri

In [None]:
import re

PATH_REPOSITORY_PATTERN = re.compile(
    r'^/(?P<user_name>[\w\d\-_]+)/(?P<repository_name>[\w\d\-_]+)', re.IGNORECASE)
PATH_FILE_PATTERN = re.compile(
    r'/blob/(?P<branch_name>[\w\d\-_]+)/(?P<file_path>.*)$', re.IGNORECASE)
FILE_EXTENSIONS = ['.csv']

In [None]:
import requests

def get_branch_name(user_name, repository_name):
    request_uri = f'https://api.github.com/repos/{user_name}/{repository_name}'
    response = requests.get(request_uri)
    if response.status_code == 200:
        response_d = response.json()
        branch_name = response_d['default_branch']
    else:
        branch_name = 'master'
    return branch_name

def get_file_paths(user_name, repository_name, branch_name):
    request_uri = f'https://api.github.com/repos/{user_name}/{repository_name}/git/trees/{branch_name}'
    response = requests.get(request_uri)
    file_paths = []
    if response.status_code == 200:
        response_d = response.json()
        file_ds = response_d['tree']
        # TODO: Consider getting paths from folders
        file_paths.extend(_['path'] for _ in file_ds)
    return file_paths

In [None]:
from os.path import splitext
from urllib.parse import urlparse as parse_uri

def get_source_uris(uri):
    parsed_uri = parse_uri(uri)
    hostname = parsed_uri.hostname
    path = parsed_uri.path
    source_uris = []
    if hostname == 'github.com':
        path_repository_match = PATH_REPOSITORY_PATTERN.match(path)
        path_file_match = PATH_FILE_PATTERN.search(path)
        if path_repository_match:
            path_repository_d = path_repository_match.groupdict()
            user_name = path_repository_d['user_name']
            repository_name = path_repository_d['repository_name']
            if path_file_match:
                branch_name = path_repository_d['branch_name']
            else:
                branch_name = get_branch_name(user_name, repository_name)
                for file_path in get_file_paths(user_name, repository_name, branch_name):
                    file_extension = splitext(file_path)[1].lower()
                    if file_extension not in FILE_EXTENSIONS:
                        continue
                    source_uris.append(
                        f'https://raw.githubusercontent.com/{user_name}/{repository_name}/{branch_name}/{file_path}')
    else:
        source_uris.append(uri)
    return sorted(source_uris)

source_uris = get_source_uris(uri)
source_uris

In [None]:
import pandas as pd

source_tables = []
for uri in source_uris:
    try:
        t = pd.read_csv(uri, parse_dates=[
            'Start Date', 'End Date', 'Talk Deadline'])
        source_tables.append(t)
    except Exception:
        pass
source_table = pd.concat(source_tables)
source_table.reset_index(inplace=True)

In [None]:
pd.notna(source_table['Talk Deadline'][173])

In [None]:
from datetime import datetime

event_by_start_date = {}
now = datetime.now()
source_table.dropna(subset=['Start Date'], inplace=True)
for index, row in source_table.iterrows():
    start_date = row['Start Date']
    end_date = row['End Date']
    if now < start_date or now < end_date:
        event_by_start_date[start_date] = row

In [None]:
# !!! TODO
conference_events = []
proposal_events = []

for start_date in sorted(event_by_start_date):
    event = event_by_start_date[start_date]
    

In [None]:
events = {}

for row in next_events.iterrows():
    key = f'{row[1]['Start Date'].year}-{row[1]['Start Date'].month}-{row[1]['Start Date'].day}'
    near_events = events.get(key, [])
    near_events.append(row[1])
    events[key] = near_events

proposals = {}
for row in next_events.sort_values(by=['Talk Deadline']).iterrows():
    # skip if no talk deadline
    key = f'{row[1]['Talk Deadline'].year}-{row[1]['Talk Deadline'].month}-{row[1]['Talk Deadline'].day}'
    near_events = proposals.get(key, [])
    near_events.append(row[1])
    proposals[key] = near_events

events.keys()

In [None]:
# TODO: remove newlines
# TODO: Improve formatting
# TODO: make separate batch for conferences important or interesting to us

lines = [
  '# Conference Planner',
]

lines.append('## Upcoming Deadlines')
for key, current_events in proposals.items():
    deadline = get_date_or_default(key)
    if deadline <= datetime.now():
        continue
    line = deadline.strftime('### %A, %B %d, %Y\n')
    lines.append(line)

    for event in current_events:
        line = f'- [{event['Subject']} Talk Proposals Due]({event['Proposal URL']})\n\n'
        lines.append(line)

line = '## Upcoming Conferences\n'
lines.append(line)
for (key, current_events) in events.items():
    deadline = get_date_or_default(key)
    if deadline <= datetime.now():
        continue
    line = deadline.strftime('### %A, %B %d, %Y\n')
    lines.append(line)

    for event in current_events:
        line = f'- [{event['Subject']}]({event['Website URL']})\n\n'
        lines.append(line)

lines

In [None]:
with (output_folder / 'calendar.md').open('wt') as f:
    f.write('\n'.join(lines))