In [None]:
from os import environ
input_folder = environ.get('CROSSCOMPUTE_INPUT_FOLDER', 'batches/standard/input')
output_folder = environ.get('CROSSCOMPUTE_OUTPUT_FOLDER', 'batches/standard/output')

In [None]:
import json
from os.path import join
variables = json.load(open(join(input_folder, 'variables.dictionary')))
uri = variables['uri']
uri

In [None]:
from urllib.parse import urlparse
import requests
import re

import pandas as pd

url = urlparse(uri)
# url = urlparse("https://github.com/python-organizers/conferences")
# url = urlparse("https://github.com/python-organizers/conferences/blob/main/2022.csv")
# url = urlparse("https://raw.githubusercontent.com/python-organizers/conferences/main/2022.csv")
hostname = url.hostname
path = url.path

github = re.compile(r"^github.com$", re.IGNORECASE)
raw_github = re.compile(r"^raw.githubusercontent.com$", re.IGNORECASE)
extensions = re.compile(r'\.(csv|txt)$', re.IGNORECASE)
file = re.compile('^/(?P<username>[\w\d\-_]+)/(?P<repo>[\w\d\-_]+)/blob/(?P<branch>[\w\d\-_]+)/(?P<filepath>.*)$', re.IGNORECASE)
repo = re.compile('^/(?P<username>[\w\d\-_]+)/(?P<repo>[\w\d\-_]+)$', re.IGNORECASE)


csv_urls = []

def get_branch_from_repo(username, repo):
    repo_api = f"https://api.github.com/repos/{username}/{repo}"
    response = requests.get(repo_api)
    if response.status_code == 200:
        repo_json = response.json()
        return repo_json["default_branch"]
        

    return 'master'

def get_files_from_repo(username, repo, branch):
    repo_api =  f"https://api.github.com/repos/{username}/{repo}/git/trees/{branch}"
    response = requests.get(repo_api)
    if response.status_code == 200:
        trees_json = response.json()
        return trees_json['tree']
        
    return []


if github.match(hostname):
    if repo.match(path):
        path_dict = repo.match(path).groupdict()
        username = path_dict['username']
        repo = path_dict['repo']
        branch = get_branch_from_repo(username, repo)
        files = get_files_from_repo(username, repo, branch)
        for file_metadata in files:
            if extensions.search(file_metadata['path']):
                csv_urls.append(f"https://raw.githubusercontent.com/{username}/{repo}/{branch}/{file_metadata['path']}")
                    
    elif file.match(path):
        path_dict = file.match(path).groupdict()
        username = path_dict['username']
        repo = path_dict['repo']
        branch = path_dict['branch']
        filepath = path_dict['filepath']
        csv_urls.append(f"https://raw.githubusercontent.com/{username}/{repo}/{branch}/{filepath}")  
elif raw_github.match(hostname):
    csv_urls.append(f"https://raw.githubusercontent.com{path}")
else:
    csv_urls.append(uri)
    
csv_urls

In [None]:
dfs = []

for url in csv_urls:
    try:
        tmp_df = pd.read_csv(url)
        dfs.append(tmp_df)
    except:
        pass

In [None]:
from datetime import datetime

df = pd.concat(dfs)
df = df.reset_index()

date_fmt = re.compile('\d{4}-\d{2}-\d{2}')
default_date = datetime.strptime('2000-01-01', '%Y-%m-%d')
def default_datestr(val):
    if type(val) == str:
        if date_fmt.match(val):
            return datetime.strptime(val, '%Y-%m-%d')
    
    return default_date
    
df['Start Date'] = df['Start Date'].apply(default_datestr)
df['End Date'] = df['End Date'].apply(default_datestr)
df['Talk Deadline'] = df['Talk Deadline'].apply(default_datestr)
df['Tutorial Deadline'] = df['Tutorial Deadline'].apply(default_datestr)


In [None]:
from datetime import datetime


tmp_df['Start Date'] = pd.to_datetime(df['Start Date'])
tmp_df['End Date'] = pd.to_datetime(df['End Date'])
tmp_df['Talk Deadline'] = pd.to_datetime(df['Talk Deadline'])
tmp_df['Tutorial Deadline'] = pd.to_datetime(df['Tutorial Deadline'])
df.fillna('', inplace=True)

next_events = df[df['Start Date'] > datetime.now()]
next_events

In [None]:
events = {}
for row in next_events.iterrows():
    key = f"{row[1]['Start Date'].year}-{row[1]['Start Date'].month}-{row[1]['Start Date'].day}"
    near_events = events.get(key, [])
    near_events.append(row[1])
    events[key] = near_events

    
proposals = {}
for row in next_events.sort_values(by=['Talk Deadline']).iterrows():
    key = f"{row[1]['Talk Deadline'].year}-{row[1]['Talk Deadline'].month}-{row[1]['Talk Deadline'].day}"
    near_events = proposals.get(key, [])
    near_events.append(row[1])
    proposals[key] = near_events
    
    
events.keys()

In [None]:
import calendar

lines = [
  "# Conference Planner\n",
]

default_date = datetime.strptime('2000-01-01', '%Y-%m-%d')


def get_date_or_default(val):
    datestr = datetime.strptime(val, "%Y-%m-%d")
    if datestr == "2000-01-01":
        return ''
    
    return datestr

line = "## Upcoming Proposal Deadlines\n"
lines.append(line)
for (key, current_events) in proposals.items():
    deadline = get_date_or_default(key)
    if deadline <= datetime.now():
            continue
    line = deadline.strftime("### %A, %B %d, %Y\n")
    lines.append(line)

    for event in current_events:
        line = f"- [{event['Subject']} Talk Proposals Due]({event['Proposal URL']})\n\n"
        lines.append(line)
        
line = "## Upcoming Conferences\n"
lines.append(line)
for (key, current_events) in events.items():
    deadline = get_date_or_default(key)
    if deadline <= datetime.now():
        continue
    line = deadline.strftime("### %A, %B %d, %Y\n")
    lines.append(line)

    for event in current_events:
        line = f"- [{event['Subject']}]({event['Website URL']})\n\n"
        lines.append(line)
        
        
lines

In [None]:
with open(join(output_folder, 'events.md'), 'wt') as f:
    for line in lines:
        f.write(line)