In [1]:
import pandas as pd
import itertools as itt
import yaml
from collections import defaultdict

In [12]:
url = (
    "https://docs.google.com/spreadsheets/d/e/2PACX-1vQDrUXGXhAyUUoeTpFV4eZ"
    "JViSLAaFvY014X0UX_tHEoxgB6vgFVWRnfrbxjSa74w/pub?gid=1919588799&single=true&output=tsv"
)
df = pd.read_csv(
    url,
    sep='\t',
    skiprows=44, usecols=[0,1,2,3,4]
)
df.columns = ['time', 'type', 'speaker', 'title', 'id']
df = df[df.time.notna()]
df

Unnamed: 0,time,type,speaker,title,id
0,Monday afternoon session: standards,,,,
1,4:30-4:50,long-20' (talk+Q&A),"Kalpana Panneerselvam, Pablo Porras, Noemi del...",Human-pathogen interaction networks: IMEx’s ap...,5380.0
2,4:50-5:10,long-20' (talk+Q&A),"Patrick Masson, Cristina Casals-Casas, Lionel ...",Gene Ontology Causal Activity Models (GO-CAMs)...,6415.0
3,5:10-5:30,long-20' (talk+Q&A),"David Osumi-Sutherland, Robert Court, Huseyin ...",The Knowledge Graph Development Kit,4113.0
4,5:30-5:40,lightning-10' (talk+Q&A),"Qian Xiang, Edmund Su, Hardeep Nahal-Bose, Rob...",ICGC-ARGO Data Submission Workflow - Integrati...,3601.0
5,5:40-5:50,lightning-10' (talk+Q&A),"Marcela Tello-Ruiz, Nahla Bassil, Sebastian Be...",Developing Standards for Biocuration & Interop...,8868.0
6,5:50-6:00,lightning-10' (talk+Q&A),"Yalan Bi, Nancy George, Irene Papatheodorou, A...",Multiplexed scRNA-seq Experiments in Biocuration,5715.0
8,Tuesday morning session: FAIR,,,,
9,10:00-10:10,lightning-10' (talk+Q&A),"Federica Quaglia, Damiano Piovesan, Silvio Tos...",APICURON: standardizing attribution of biocura...,966.0
10,10:10-10:20,lightning-10' (talk+Q&A),"Frederic B. Bastian, Vincent Gardeux, Bart Dep...",scFAIR: Standardization and stewardship of sin...,8261.0


In [3]:

groups = defaultdict(list)
current_group_name = None

def fix_authors(s):
    return s.split(",")[0].strip().split(" and ")[0].strip()

def fix_type(s):
    if s == "long-20' (talk+Q&A)":
        return "long"
    if s == "lightning-10' (talk+Q&A)":
        return "lightning"
    raise ValueError(s)

    
def get_times(s):
    start_time, end_time = map(clean_time, map(str.strip, time.split("-")))
    return start_time, end_time
    
def clean_time(s):
    h, m = s.split(":")
    if int(h) <8:
        # print("old h", h, end=" ")
        h = str(int(h) + 12)
        # print("new h", h)
    return f"{h}.{m}"
    
    
columns =['time_start', 'time_end', 'type', 'speaker', 'title', 'id']
    
for time, talk_type, authors, title, idx in df.values:
    if not time[0].isdigit():
        current_group_name = time
        continue
    else:
        start_time, end_time = get_times(time)
        groups[current_group_name].append((
            start_time, end_time, fix_type(talk_type), fix_authors(authors), title, int(idx),
        ))

groups = {g: pd.DataFrame(l, columns=columns) for g, l in groups.items()}

In [7]:
info = {
    'Monday afternoon session: standards': dict(
        order=0,
        day="Monday",
        date="April 24th",
        start="16.30",
        end="18.00",
        title="Standards 1",
    ),
    'Tuesday morning session: FAIR': dict(
        order=1,
        day="Tuesday",
        date=" April 25th",
        start="10.00",
        end="12.30",
        title="FAIR 1",
    ), 

    'Tuesday afternoon session (parallel 1): applications': dict(
        order=2,
        day="Tuesday",
        date="April 25th",
        start="13.30",
        end="15.45",
        title="Applications 1",
    ), 
    'Tuesday afternoon session (parallel 2): AI and Text mining': dict(
        order=3,
        day="Tuesday",
        date="April 25th",
        start="13.30",
        end="15.45",
        title="AI and Text Mining 1",
    ),
    'Wednesday afternoon session: community': dict(
        order=4,
        day="Wednesday",
        date="April 26th",
        start="10.00",
        end="10.30",
        title="Community 1",
    ),
    'Wednesday morning session: community': dict(
        order=5,
        day="Wednesday",
        date="April 26th",
        start="13.00",
        end="14.30",
        title="Community 2",
    ),
}

In [8]:
rows = []
for key, data in sorted(info.items(), key=lambda e: e[1]["order"]):
    data["talks"] = groups[key].to_dict(orient="records")
    rows.append(data)
rows

[{'order': 0,
  'day': 'Monday',
  'date': 'April 24th',
  'start': '16.30',
  'end': '18.00',
  'title': 'Standards 1',
  'talks': [{'time_start': '16.30',
    'time_end': '16.50',
    'type': 'long',
    'speaker': 'Kalpana Panneerselvam',
    'title': 'Human-pathogen interaction networks: IMEx’s approach on the contextual metadata of the experimental evidence.',
    'id': 5380},
   {'time_start': '16.50',
    'time_end': '17.10',
    'type': 'long',
    'speaker': 'Patrick Masson',
    'title': 'Gene Ontology Causal Activity Models (GO-CAMs) for human biology',
    'id': 6415},
   {'time_start': '17.10',
    'time_end': '17.30',
    'type': 'long',
    'speaker': 'David Osumi-Sutherland',
    'title': 'The Knowledge Graph Development Kit',
    'id': 4113},
   {'time_start': '17.30',
    'time_end': '17.40',
    'type': 'lightning',
    'speaker': 'Qian Xiang',
    'title': 'ICGC-ARGO Data Submission Workflow - Integration of data validation and submission to accelerate the data cura

In [9]:
with open('/Users/cthoyt/dev/biocuration2023.github.io/_data/sessions.yml', 'w') as file:
    yaml.safe_dump(rows, file, allow_unicode=True, sort_keys=True)