In [1]:
from os import environ
input_folder = environ.get('CROSSCOMPUTE_INPUT_FOLDER', 'batches/standard/input')
output_folder = environ.get('CROSSCOMPUTE_OUTPUT_FOLDER', 'batches/standard/output')

In [2]:
import json
from os.path import join
variables = json.load(open(join(input_folder, 'variables.dictionary')))
uri = variables['uri']
uri

'https://github.com/python-organizers/conferences'

In [3]:
from urllib.parse import urlparse
import requests
import re

import pandas as pd

url = urlparse(uri)
# url = urlparse("https://github.com/python-organizers/conferences")
# url = urlparse("https://github.com/python-organizers/conferences/blob/main/2022.csv")
# url = urlparse("https://raw.githubusercontent.com/python-organizers/conferences/main/2022.csv")
hostname = url.hostname
path = url.path

github = re.compile(r"^github.com$", re.IGNORECASE)
raw_github = re.compile(r"^raw.githubusercontent.com$", re.IGNORECASE)
extensions = re.compile(r'\.(csv|txt)$', re.IGNORECASE)
file = re.compile('^/(?P<username>[\w\d\-_]+)/(?P<repo>[\w\d\-_]+)/blob/(?P<branch>[\w\d\-_]+)/(?P<filepath>.*)$', re.IGNORECASE)
repo = re.compile('^/(?P<username>[\w\d\-_]+)/(?P<repo>[\w\d\-_]+)$', re.IGNORECASE)


csv_urls = []

def get_branch_from_repo(username, repo):
    repo_api = f"https://api.github.com/repos/{username}/{repo}"
    response = requests.get(repo_api)
    if response.status_code == 200:
        repo_json = response.json()
        return repo_json["default_branch"]
        

    return 'master'

def get_files_from_repo(username, repo, branch):
    repo_api =  f"https://api.github.com/repos/{username}/{repo}/git/trees/{branch}"
    response = requests.get(repo_api)
    if response.status_code == 200:
        trees_json = response.json()
        return trees_json['tree']
        
    return []


if github.match(hostname):
    if repo.match(path):
        path_dict = repo.match(path).groupdict()
        username = path_dict['username']
        repo = path_dict['repo']
        branch = get_branch_from_repo(username, repo)
        files = get_files_from_repo(username, repo, branch)
        for file_metadata in files:
            if extensions.search(file_metadata['path']):
                csv_urls.append(f"https://raw.githubusercontent.com/{username}/{repo}/{branch}/{file_metadata['path']}")
                    
    elif file.match(path):
        path_dict = file.match(path).groupdict()
        username = path_dict['username']
        repo = path_dict['repo']
        branch = path_dict['branch']
        filepath = path_dict['filepath']
        csv_urls.append(f"https://raw.githubusercontent.com/{username}/{repo}/{branch}/{filepath}")  
elif raw_github.match(hostname):
    csv_urls.append(f"https://raw.githubusercontent.com{path}")
else:
    csv_urls.append(uri)
    
csv_urls

['https://raw.githubusercontent.com/python-organizers/conferences/main/2017.csv',
 'https://raw.githubusercontent.com/python-organizers/conferences/main/2018.csv',
 'https://raw.githubusercontent.com/python-organizers/conferences/main/2019.csv',
 'https://raw.githubusercontent.com/python-organizers/conferences/main/2020.csv',
 'https://raw.githubusercontent.com/python-organizers/conferences/main/2021.csv',
 'https://raw.githubusercontent.com/python-organizers/conferences/main/2022.csv',
 'https://raw.githubusercontent.com/python-organizers/conferences/main/2023.csv']

In [4]:
dfs = []

for url in csv_urls:
    try:
        tmp_df = pd.read_csv(url)
        dfs.append(tmp_df)
    except:
        pass

In [5]:
from datetime import datetime

df = pd.concat(dfs)
df = df.reset_index()

date_fmt = re.compile('\d{4}-\d{2}-\d{2}')
default_date = datetime.strptime('2000-01-01', '%Y-%m-%d')
def default_datestr(val):
    if type(val) == str:
        if date_fmt.match(val):
            return datetime.strptime(val, '%Y-%m-%d')
    
    return default_date
    
df['Start Date'] = df['Start Date'].apply(default_datestr)
df['End Date'] = df['End Date'].apply(default_datestr)
df['Talk Deadline'] = df['Talk Deadline'].apply(default_datestr)
df['Tutorial Deadline'] = df['Tutorial Deadline'].apply(default_datestr)


In [6]:
from datetime import datetime


tmp_df['Start Date'] = pd.to_datetime(df['Start Date'])
tmp_df['End Date'] = pd.to_datetime(df['End Date'])
tmp_df['Talk Deadline'] = pd.to_datetime(df['Talk Deadline'])
tmp_df['Tutorial Deadline'] = pd.to_datetime(df['Tutorial Deadline'])
df.fillna('', inplace=True)

next_events = df[df['Start Date'] > datetime.now()]
next_events

Unnamed: 0,index,Subject,Start Date,End Date,Location,Country,Venue,Tutorial Deadline,Talk Deadline,Website URL,Proposal URL,Sponsorship URL
195,0,PyCon France,2023-02-16,2023-02-19,"Bordeaux, France",FRA,Université Bordeaux,2000-01-01,2023-01-07,https://www.pycon.fr/2023/,https://cfp-2023.pycon.fr/cfp/,https://www.pycon.fr/2023/fr/support.html
196,1,PyCon Namibia 2023,2023-02-21,2023-02-23,"Windhoek, Namibia",NAM,,2023-01-22,2023-01-22,https://na.pycon.org,https://pretalx.com/pycon-namibia-2022/cfp,https://na.pycon.org/sponsorship/
197,2,GeoPython 2023,2023-03-06,2023-03-08,"Basel, Switzerland",CHE,FHNW,2000-01-01,2000-01-01,https://2023.geopython.net,https://submit.geopython.net/geopython-2023/cfp,
198,3,PyTexas 2023,2023-04-01,2023-04-02,"Austin,TX",USA,https://www.pytexas.org/attend/venue,2000-01-01,2023-01-15,https://www.pytexas.org/,https://pretalx.com/pytexas-2023/,https://www.pytexas.org/sponsors/prospectus
199,4,PyCon DE & PyData Berlin 2023,2023-04-17,2023-04-19,"Berlin, Germany",GER,bcc Berlin Congress Center,2023-01-05,2023-01-05,https://2023.pycon.de,https://2023.pycon.de/blog/call-for-proposals/,https://2023.pycon.de/blog/pyconde-pydata-berl...
200,5,PyCon LT,2023-05-17,2023-05-20,"Vilnius, Lithuania",LTU,Vilnius University of Social sciences,2000-01-01,2000-01-01,https://pycon.lt/2023,,
201,6,PyCon Italia 2023,2023-05-25,2023-05-28,"Florence, Italy",ITA,Grand Hotel Mediterraneo,2023-01-15,2023-01-15,https://pycon.it/en,https://pycon.it/cfp,
202,7,EuroPython,2023-07-17,2023-07-23,,,,2000-01-01,2000-01-01,https://europython.eu,,
203,8,EuroSciPy,2023-08-14,2023-08-18,"Basel, Switzerlan",CHE,"Kollegienhaus, University of Basel",2000-01-01,2000-01-01,https://www.euroscipy.org,,


In [32]:
events = {}
for row in next_events.iterrows():
    key = f"{row[1]['Start Date'].year}-{row[1]['Start Date'].month}-{row[1]['Start Date'].day}"
    near_events = events.get(key, [])
    near_events.append(row[1])
    events[key] = near_events

    
proposals = {}
for row in next_events.sort_values(by=['Talk Deadline']).iterrows():
    key = f"{row[1]['Talk Deadline'].year}-{row[1]['Talk Deadline'].month}-{row[1]['Talk Deadline'].day}"
    near_events = proposals.get(key, [])
    near_events.append(row[1])
    proposals[key] = near_events
    
    
events.keys()

dict_keys(['2023-2-16', '2023-2-21', '2023-3-6', '2023-4-1', '2023-4-17', '2023-5-17', '2023-5-25', '2023-7-17', '2023-8-14'])

In [34]:
import calendar

lines = [
  "# Conference Planner\n",
]

default_date = datetime.strptime('2000-01-01', '%Y-%m-%d')


def get_date_or_default(val):
    datestr = datetime.strptime(val, "%Y-%m-%d")
    if datestr == "2000-01-01":
        return ''
    
    return datestr

line = "## Upcoming Proposal Deadlines\n"
lines.append(line)
for (key, current_events) in proposals.items():
    deadline = get_date_or_default(key)
    if deadline <= datetime.now():
            continue
    line = deadline.strftime("### %A, %B %d, %Y\n")
    lines.append(line)

    for event in current_events:
        line = f"- [{event['Subject']} Talk Proposals Due]({event['Proposal URL']})\n\n"
        lines.append(line)
        
line = "## Upcoming Conferences\n"
lines.append(line)
for (key, current_events) in events.items():
    deadline = get_date_or_default(key)
    if deadline <= datetime.now():
        continue
    line = deadline.strftime("### %A, %B %d, %Y\n")
    lines.append(line)

    for event in current_events:
        line = f"- [{event['Subject']}]({event['Website URL']})\n\n"
        lines.append(line)
        
        
lines

['# Conference Planner\n',
 '## Upcoming Proposal Deadlines\n',
 '### Sunday, January 15, 2023\n',
 '- [PyTexas 2023 Talk Proposals Due](https://pretalx.com/pytexas-2023/)\n\n',
 '- [PyCon Italia 2023 Talk Proposals Due](https://pycon.it/cfp)\n\n',
 '### Sunday, January 22, 2023\n',
 '- [PyCon Namibia 2023 Talk Proposals Due](https://pretalx.com/pycon-namibia-2022/cfp)\n\n',
 '## Upcoming Conferences\n',
 '### Thursday, February 16, 2023\n',
 '- [PyCon France](https://www.pycon.fr/2023/)\n\n',
 '### Tuesday, February 21, 2023\n',
 '- [PyCon Namibia 2023](https://na.pycon.org)\n\n',
 '### Monday, March 06, 2023\n',
 '- [GeoPython 2023](https://2023.geopython.net)\n\n',
 '### Saturday, April 01, 2023\n',
 '- [PyTexas 2023](https://www.pytexas.org/)\n\n',
 '### Monday, April 17, 2023\n',
 '- [PyCon DE & PyData Berlin 2023](https://2023.pycon.de)\n\n',
 '### Wednesday, May 17, 2023\n',
 '- [PyCon LT](https://pycon.lt/2023)\n\n',
 '### Thursday, May 25, 2023\n',
 '- [PyCon Italia 2023](http

In [35]:
with open(join(output_folder, 'events.md'), 'wt') as f:
    for line in lines:
        f.write(line)