In [1]:
import pandas as pd
import itertools as itt
import yaml
from collections import defaultdict
from pathlib import Path


In [2]:
HERE = Path("..").resolve()
HERE

PosixPath('/Users/cthoyt/dev/biocuration2023.github.io')

In [3]:
people = {}
name_to_person = {}
for fname in ["program_committee.yml", "organizing_committee.yml"]:
    path = HERE.joinpath("_data", fname)
    for record in yaml.safe_load(path.read_text()):
        people[record["orcid"]] = record
        name_to_person[record["personLabel"].split()[0].lower()] = record
name_to_person

{'frédérique': {'employerLabel_': 'University of Geneva',
  'genderLabel': 'female',
  'gscholar': 'wudILEcAAAAJ',
  'linkedin': 'frederique-lisacek-6028275',
  'nationalityLabel': 'France',
  'orcid': '0000-0002-0948-4537',
  'person': 'Q28609783',
  'personDescription': 'French bioinformatician',
  'personLabel': 'Frédérique Lisacek',
  'topics': 'glycoproteomics, interactomics, protein-protein interaction, glycoprotein, Semantic Web',
  'twitter': 'GlycomicsExpasy',
  'note': 'Chair'},
 'luana': {'employerLabel_': 'Tor Vergata University of Rome',
  'genderLabel': 'female',
  'gscholar': '_AvozfEAAAAJ',
  'linkedin': 'luana-licata-22a62153',
  'nationalityLabel': 'Italy',
  'orcid': '0000-0001-5084-9000',
  'person': 'Q30158411',
  'personDescription': 'researcher',
  'personLabel': 'Luana Licata',
  'topics': 'host-pathogen interaction, biological database, biocuration, molecular interaction, protein-protein interaction, ontology',
  'twitter': 'MINT_database',
  'note': 'Co-Chair'

In [4]:
# url = (
#     "https://docs.google.com/spreadsheets/d/e/2PACX-1vQDrUXGXhAyUUoeTpFV4eZ"
#     "JViSLAaFvY014X0UX_tHEoxgB6vgFVWRnfrbxjSa74w/pub?gid=1919588799&single=true&output=tsv"
# )
# print(f"Reading {url}")
# df = pd.read_csv(
#     url,
#     sep='\t',
#     skiprows=44, usecols=[0,1,2,3,4]
# )
df = pd.read_excel(
    "Biocuration 2023_submissions_2023-01-04_1672831641(1).xlsx", 
    skiprows=44, 
    usecols=[0,1,2,3,4],
)
df.columns = ['time', 'type', 'speaker', 'title', 'id']
df = df[df.time.notna()]
df

Unnamed: 0,time,type,speaker,title,id
0,Monday afternoon session: standards,,,,
1,4:30-4:50,long-20' (talk+Q&A),"Kalpana Panneerselvam, Pablo Porras, Noemi del...",Human-pathogen interaction networks: IMEx’s ap...,5380.0
2,4:50-5:10,long-20' (talk+Q&A),"Patrick Masson, Cristina Casals-Casas, Lionel ...",Gene Ontology Causal Activity Models (GO-CAMs)...,6415.0
3,5:10-5:30,long-20' (talk+Q&A),"David Osumi-Sutherland, Robert Court, Huseyin ...",The Knowledge Graph Development Kit,4113.0
4,5:30-5:40,lightning-10' (talk+Q&A),"Qian Xiang, Edmund Su, Hardeep Nahal-Bose, Rob...",ICGC-ARGO Data Submission Workflow - Integrati...,3601.0
5,5:40-5:50,lightning-10' (talk+Q&A),"Marcela Tello-Ruiz, Nahla Bassil, Sebastian Be...",Developing Standards for Biocuration & Interop...,8868.0
6,5:50-6:00,lightning-10' (talk+Q&A),"Yalan Bi, Nancy George, Irene Papatheodorou, A...",Multiplexed scRNA-seq Experiments in Biocuration,5715.0
8,Tuesday morning session: FAIR 1,,,,
9,10:00-10:10,lightning-10' (talk+Q&A),"Adel Bouhraoua, Federica Quaglia, Damiano Piov...",APICURON: standardizing attribution of biocura...,966.0
10,10:10-10:20,lightning-10' (talk+Q&A),"Frederic B. Bastian, Vincent Gardeux, Bart Dep...",scFAIR: Standardization and stewardship of sin...,8261.0


In [5]:
groups = defaultdict(list)
current_group_name = None

def fix_authors(s):
    return s.split(",")[0].strip().split(" and ")[0].strip()

def fix_type(s):
    if s == "long-20' (talk+Q&A)":
        return "long"
    if s == "lightning-10' (talk+Q&A)":
        return "lightning"
    raise ValueError(s)

    
def get_times(s):
    start_time, end_time = map(clean_time, map(str.strip, time.split("-")))
    return start_time, end_time
    
def clean_time(s):
    h, m = s.split(":")
    if int(h) <8:
        # print("old h", h, end=" ")
        h = str(int(h) + 12)
        # print("new h", h)
    return f"{h}.{m}"
    
    
columns =['time_start', 'time_end', 'type', 'speaker', 'title', 'id']
    
for time, talk_type, authors, title, idx in df.values:
    if not time[0].isdigit():
        current_group_name = time
        continue
    else:
        start_time, end_time = get_times(time)
        groups[current_group_name].append((
            start_time, end_time, fix_type(talk_type), fix_authors(authors), title, int(idx),
        ))

groups = {g: pd.DataFrame(l, columns=columns) for g, l in groups.items()}

In [6]:
info = [
    {
        "day": "Monday",
        "date": "April 24th",
        "sessions": {
            'Monday Salutation': dict(
                chair="charles",
                cochair="silvio",
                order=0,
                room="Sala Piero Tortolina Auditorium",
                start="14.30",
                end="14.45",
                title="Conference Salutation",

            ),
            'Monday Salutation ISB': dict(
                chair="charles",
                cochair="silvio",
                order=0.5,
                room="Sala Piero Tortolina Auditorium",
                start="14.45",
                end="15.00",
                title="ISB Greeting from Ruth Lovering (ISB EC Chair)",
            ),
            'Monday Keynote': dict(
                chair="charles",
                cochair="deepti",
                order=1,
                room="Sala Piero Tortolina Auditorium",
                title="Keynote 1 - Paula Leitman",
                start="15.00",
                end="16.00",
                day="Monday",
                date="April 24th",
            ),
            'Monday afternoon session: standards': dict(
                order=2,
                day="Monday",
                date="April 24th",
                start="16.30",
                end="18.00",
                title="Standards 1",
                chair="frédérique",
                cochair="deepti",
                room="Sala Piero Tortolina Auditorium",
            ),
        },
    },
    {
        "day": "Tuesday",
        "date": "April 25th",
        "sessions": {
            'Tuesday Keynote': dict(
                cochair="deepak",
                chair="paola",
                order=3,
                room="Sala Piero Tortolina Auditorium",
                title="Keynote 2 - Marco Roos",
                start="9.00",
                end="10.00",
                day="Tuesday",
                date=" April 25th",
            ),
            'Tuesday morning session: FAIR 1': dict(
                order=4,
                day="Tuesday",
                date=" April 25th",
                start="10.00",
                end="10.30",
                title="FAIR 1",
                chair="deepak",
                cochair="paola",
                room="Sala Piero Tortolina Auditorium",
            ), 
            'Tuesday morning session: FAIR 2': dict(
                order=5,
                day="Tuesday",
                date=" April 25th",
                start="11.00",
                end="12.30",
                title="FAIR 2",
                chair="deepak",
                cochair="paola",
                room="Sala Piero Tortolina Auditorium",
            ), 
            'Tuesday afternoon session (parallel 1): applications 1': dict(
                order=6,
                day="Tuesday",
                date="April 25th",
                start="13.30",
                end="14.30",
                title="Applications 1",
                chair="Philippe",
                cochair="Michael",
                room="Sala Piero Tortolina Auditorium",
            ), 
            'Tuesday afternoon session (parallel 2): AI and Text mining 1': dict(
                order=7,
                day="Tuesday",
                date="April 25th",
                start="13.30",
                end="14.30",
                title="AI and Text Mining 1",
                chair="Jennifer",
                cochair="Tiago",
                room="Spazio 35",
            ),
            'Tuesday afternoon session (parallel 1): applications 2': dict(
                order=8,
                day="Tuesday",
                date="April 25th",
                start="14.45",
                end="15.45",
                title="Applications 2",
                chair="Michael",
                cochair="Philippe",
                room="Sala Piero Tortolina Auditorium",
            ),
            'Tuesday afternoon session (parallel 2): AI and Text mining 2': dict(
                order=9,
                day="Tuesday",
                date="April 25th",
                start="14.45",
                end="15.45",
                title="AI and Text Mining 2",
                chair="Tiago",
                cochair="Jennifer",
                room="Spazio 35",
            ),
            'Tuesday Poster Session 1': dict(
                order=10,
                day="Tuesday",
                date="April 25th",
                start="16.00",
                end="17.00",
                title="Poster Session 1 (Even Numbers)",
                room="Agora",
            ),
            'Tuesday Poster Session 2': dict(
                order=11,
                day="Tuesday",
                date="April 25th",
                start="17.00",
                end="18.00",
                title="Poster Session 2 (Odd Numbers)",
                room="Agora",
            ),
        }
    },
    {
        "day": "Wednesday",
        "date": "April 26th",
        "sessions": {
            'Wednesday Keynote': dict(
                order=10,
                chair="luana",
                cochair="deepti",
                room="Sala Piero Tortolina Auditorium",
                title="Keynote 3 - Guy Cochrane",
                start="9.00",
                end="10.00",
                day="Wednesday",
                date="April 26th",
            ),
            'Wednesday morning session: community': dict(
                order=11,
                day="Wednesday",
                date="April 26th",
                start="10.00",
                end="10.30",
                title="Community 1",
                chair="Luana",
                cochair="frédérique",
                room="Sala Piero Tortolina Auditorium",
            ),
            "Wednesday Careers in Biocuration": dict(
                order=12,
                chair="deepti",
                day="Wednesday",
                date="April 26th",
                cochair="philippe",
                room="Sala Piero Tortolina Auditorium",
                title="Careers in Biocuration Panel/Workshop",
                start="11.00",
                end="12.00",
            ),
            'Wednesday afternoon session: community': dict(
                order=13,
                day="Wednesday",
                date="April 26th",
                start="13.00",
                end="14.30",
                title="Community 2",
                chair="frédérique",
                cochair="Luana",
                room="Sala Piero Tortolina Auditorium",
            ),
            "Wednesday Valediction": dict(
                order=14,
                chair="charles",
                cochair="silvio",
                start="14.30",
                end="15.30",
                title="Valediction and ISB Time",
                room="Sala Piero Tortolina Auditorium",
            ),
        }
    }
]

In [7]:
for day in info:
    rows = []
    for key, data in sorted(day['sessions'].items(), key=lambda e: e[1]["order"]):
        if (groups_df := groups.get(key)) is not None:
            data["talks"] = groups_df.to_dict(orient="records")
        chairs = [
            name_to_person[data.pop(chair_key).lower()].copy()            
            for chair_key in ["chair", "cochair"]
            if chair_key in data
        ]
        if chairs:
            data["chairs"] = chairs
        rows.append(data)
        
    day["sessions"] = rows
info

[{'day': 'Monday',
  'date': 'April 24th',
  'sessions': [{'order': 0,
    'room': 'Sala Piero Tortolina Auditorium',
    'start': '14.30',
    'end': '14.45',
    'title': 'Conference Salutation',
    'chairs': [{'employerLabel_': 'Harvard Medical School',
      'genderLabel': 'male',
      'github': 'cthoyt',
      'gscholar': 'PjrpzUIAAAAJ',
      'image': 'http://commons.wikimedia.org/wiki/Special:FilePath/Charles%20Tapley%20Hoyt%202019.jpg',
      'linkedin': 'cthoyt',
      'max_start_date': '2021-02-15T00:00:00Z',
      'nationalityLabel': 'United States of America',
      'orcid': '0000-0003-4423-4370',
      'person': 'Q47475003',
      'personDescription': 'American chemist',
      'personLabel': 'Charles Tapley Hoyt',
      'topics': 'knowledge graph, ontology, natural language processing',
      'twitter': 'cthoyt',
      'note': 'Co-Chair'},
     {'employerLabel_': 'University of Padua',
      'genderLabel': 'male',
      'nationalityLabel': 'Italy',
      'orcid': '0000-0

In [8]:
with open('/Users/cthoyt/dev/biocuration2023.github.io/_data/sessions.yml', 'w') as file:
    yaml.safe_dump(info, file, allow_unicode=True, sort_keys=True)