In [1]:
import requests
import pandas as pd

In [2]:
json_file = "https://eindhoven2024.pydata.org/cfp/schedule/export/schedule.json"
response = requests.get(json_file)
data = response.json()
data.keys()

dict_keys(['schedule'])

In [3]:
rooms = data["schedule"]["conference"]["days"][0]["rooms"].values()
sessions_per_room = [
    pd.DataFrame(room)
    for room in rooms
]
sessions = pd.concat(sessions_per_room)

In [4]:
sessions.columns

Index(['id', 'guid', 'logo', 'date', 'start', 'duration', 'room', 'slug',
       'url', 'title', 'subtitle', 'track', 'type', 'language', 'abstract',
       'description', 'recording_license', 'do_not_record', 'persons', 'links',
       'attachments', 'answers'],
      dtype='object')

In [5]:
sessions.iloc[0]

id                                                                  17
guid                              a2eb19a3-1642-5781-bb82-45b681556560
logo                                                                  
date                                         2024-07-11T10:00:00+02:00
start                                                            10:00
duration                                                         00:30
room                                                          If (1.1)
slug                           cfp-17-explainable-ai-in-the-lime-light
url                  https://eindhoven2024.pydata.org/cfp/talk/QFW9XN/
title                                 Explainable AI in the LIME-light
subtitle                                                              
track                                                             None
type                                                              Talk
language                                                            en
abstra

In [6]:
sessions.iloc[0]["persons"]

[{'id': 25,
  'code': 'LHNM7K',
  'public_name': 'Sanne van den Bogaart',
  'biography': 'For the past 3 years I have been working as a Data Science consultant at Pipple. Since Pipple is active in multiple different sectors, I have had the opportunity to do many different projects. What I have discovered is that explainability of the machine learning used was a critical topic in all of these projects. Fortunately, frameworks like LIME have emerged to provided this much needed explainability. I am excited to discuss more about LIME at the upcoming 2024 PyData Eindhoven conference.',
  'answers': []}]

Create 1 column combining all people in the `persons` array.

In [7]:
def format_persons(persons):
    return "\n".join(
        f""" ### {person["public_name"]}
{person["biography"]}
"""
        for person in persons
    )
sessions["persons_text"] = sessions["persons"].apply(format_persons)


Format an entire session text, combining all columns.

In [None]:

def format_session_text(session):
    return f"""# {session["title"]}
{session["abstract"]}

## Description
{session["description"]}

## Timeslot
{session["date"]} with a duration of {session["duration"]}

## Room
{session["room"]}

## Speaker
{session["persons_text"]}
"""
sessions["text"] = sessions.apply(
    format_session_text,
    axis=1
)

In [11]:
sessions = sessions.set_index("id")

In [12]:
sessions.to_json("data/pydata_eindhoven_2024_sessions.json")