In [4]:
import xml.etree.ElementTree as ET
import pandas as pd

tree = ET.parse('calendar_9148_1692057526_2023-08-14.xml')
root = tree.getroot()

data = []

root_events = root.find('events')
events_all = root_events.findall('event')

for event in events_all:
    event_data = {}
    for tag in ['title', 'start', 'end', 'URL', 'categories']:
        event_tag = event.find(tag)
        if event_tag is not None:
            if tag == 'categories':
                categories = [cats.text for cats in event_tag.findall('category')]
                event_data[tag] = ', '.join(categories)
            else:
                event_data[tag] = event_tag.text
        else:
            event_data[tag] = None
    data.append(event_data)

df = pd.DataFrame(data)
df.head()

Unnamed: 0,title,start,end,URL,categories
0,Introduction to CMU Engineering Library Collec...,2023-08-22T11:00:00-04:00,2023-08-22T12:00:00-04:00,https://cmu.libcal.com/event/11130822,
1,Introduction to CMU Libraries for Internationa...,2023-08-23T11:00:00-04:00,2023-08-23T12:00:00-04:00,https://cmu.libcal.com/event/11130935,
2,Building your Programming Tool Box: Command Li...,2023-09-08T12:00:00-04:00,2023-09-08T13:30:00-04:00,https://cmu.libcal.com/event/11068306,"Open Science, Working With Data"
3,Introduction to Technical Standards,2023-09-13T11:00:00-04:00,2023-09-13T12:00:00-04:00,https://cmu.libcal.com/event/11056005,
4,Introduction to Python for Data Science Part 1...,2023-09-13T13:00:00-04:00,2023-09-13T15:00:00-04:00,https://cmu.libcal.com/event/11101229,"Open Science, Working With Data"


In [5]:
df.dropna(inplace=True)

In [6]:
filtered_rows = df[df['categories'].apply(lambda cats: all(cat in cats.split(', ') for cat in ['Open Science']))]
filtered_rows.head()

Unnamed: 0,title,start,end,URL,categories
2,Building your Programming Tool Box: Command Li...,2023-09-08T12:00:00-04:00,2023-09-08T13:30:00-04:00,https://cmu.libcal.com/event/11068306,"Open Science, Working With Data"
4,Introduction to Python for Data Science Part 1...,2023-09-13T13:00:00-04:00,2023-09-13T15:00:00-04:00,https://cmu.libcal.com/event/11101229,"Open Science, Working With Data"
6,Building Your Programming Toolbox: Version Con...,2023-09-15T12:00:00-04:00,2023-09-15T13:30:00-04:00,https://cmu.libcal.com/event/11108087,"Open Science, Working With Data"
10,Introduction to Python for Data Science Part 2...,2023-09-20T13:00:00-04:00,2023-09-20T15:00:00-04:00,https://cmu.libcal.com/event/11101249,"Open Science, Working With Data"
11,Building Your Programming Toolbox: Collaborati...,2023-09-22T12:00:00-04:00,2023-09-22T13:30:00-04:00,https://cmu.libcal.com/event/11108299,"Open Science, Working With Data"


In [7]:
workshops = filtered_rows.copy()

In [8]:
workshops['date'] = pd.to_datetime(workshops['start'], format='%Y-%m-%d', utc=True).dt.date
workshops['start'] = pd.to_datetime(workshops['start'], format='%Y-%m-%d %H:%M:%S', utc=True).dt.tz_convert('US/Eastern').dt.strftime('%I:%M %p')
workshops['end'] = pd.to_datetime(workshops['end'], format='%Y-%m-%d %H:%M:%S', utc=True).dt.tz_convert('US/Eastern').dt.strftime('%I:%M %p')
workshops.head()

Unnamed: 0,title,start,end,URL,categories,date
2,Building your Programming Tool Box: Command Li...,12:00 PM,01:30 PM,https://cmu.libcal.com/event/11068306,"Open Science, Working With Data",2023-09-08
4,Introduction to Python for Data Science Part 1...,01:00 PM,03:00 PM,https://cmu.libcal.com/event/11101229,"Open Science, Working With Data",2023-09-13
6,Building Your Programming Toolbox: Version Con...,12:00 PM,01:30 PM,https://cmu.libcal.com/event/11108087,"Open Science, Working With Data",2023-09-15
10,Introduction to Python for Data Science Part 2...,01:00 PM,03:00 PM,https://cmu.libcal.com/event/11101249,"Open Science, Working With Data",2023-09-20
11,Building Your Programming Toolbox: Collaborati...,12:00 PM,01:30 PM,https://cmu.libcal.com/event/11108299,"Open Science, Working With Data",2023-09-22


In [9]:
workshops.to_csv('workshops.csv', index=False)

In [10]:
paths = pd.read_csv('content_paths.csv').drop('title',axis=1)
paths

Unnamed: 0,path,URL
0,cli/cli_main.md,https://cmu.libcal.com/event/11068306
1,Python_Series_Materials/part_1/part_1.md,https://cmu.libcal.com/event/11101229
2,https://swcarpentry.github.io/git-novice/,https://cmu.libcal.com/event/11108087
3,Python_Series_Materials/part_3/part_3.md,https://cmu.libcal.com/event/11101249
4,https://swcarpentry.github.io/git-novice/,https://cmu.libcal.com/event/11108299
5,content/git_github.md,https://cmu.libcal.com/event/11108435
6,Python_Series_Materials/part_2/part_2.md,https://cmu.libcal.com/event/11067479
7,content/obsidian.md,https://cmu.libcal.com/event/11101345
8,content/osf.md,https://cmu.libcal.com/event/11067730
9,content/openrefine.md,https://cmu.libcal.com/event/11068024


In [20]:
workshops_paths = pd.merge(left=workshops, right=paths, how='left', left_on='URL', right_on='URL')
workshops_paths.fillna(0, inplace=True)
workshops_paths

Unnamed: 0,title,start,end,URL,categories,date,path
0,Building your Programming Tool Box: Command Li...,12:00 PM,01:30 PM,https://cmu.libcal.com/event/11068306,"Open Science, Working With Data",2023-09-08,cli/cli_main.md
1,Introduction to Python for Data Science Part 1...,01:00 PM,03:00 PM,https://cmu.libcal.com/event/11101229,"Open Science, Working With Data",2023-09-13,Python_Series_Materials/part_1/part_1.md
2,Building Your Programming Toolbox: Version Con...,12:00 PM,01:30 PM,https://cmu.libcal.com/event/11108087,"Open Science, Working With Data",2023-09-15,https://swcarpentry.github.io/git-novice/
3,Introduction to Python for Data Science Part 2...,01:00 PM,03:00 PM,https://cmu.libcal.com/event/11101249,"Open Science, Working With Data",2023-09-20,Python_Series_Materials/part_3/part_3.md
4,Building Your Programming Toolbox: Collaborati...,12:00 PM,01:30 PM,https://cmu.libcal.com/event/11108299,"Open Science, Working With Data",2023-09-22,https://swcarpentry.github.io/git-novice/
5,Introduction to Python for Data Science Part 3...,01:00 PM,03:00 PM,https://cmu.libcal.com/event/11101275,"Open Science, Working With Data",2023-09-27,0
6,Introduction to Research Data Management and D...,12:00 PM,01:00 PM,https://cmu.libcal.com/event/11054938,"Open Science, Working With Data",2023-09-28,0
7,Building Your Programming Toolbox: Creating We...,12:00 PM,01:30 PM,https://cmu.libcal.com/event/11108435,"Open Science, Working With Data",2023-09-29,content/git_github.md
8,Making the Most with Jupyter Lab and Notebooks,01:00 PM,03:00 PM,https://cmu.libcal.com/event/11067479,"Open Science, Working With Data",2023-10-03,Python_Series_Materials/part_2/part_2.md
9,Python for Harvesting Data on the Web,01:00 PM,03:00 PM,https://cmu.libcal.com/event/11067535,"Open Science, Working With Data",2023-10-19,0


In [21]:
table_head = """| Day | Time | Title | Content
| --- | --- | --- | ---
"""

In [22]:
table_body = """
"""

for i, row in workshops_paths.iterrows():
    table_entry = f"| {row['date']} | {row['start']} - {row['end']} | {row['title']} "
    if row['path'] != 0:
        table_entry += f"| {row['path']} \n"
    else:
        table_entry += "| \n"
    
    table_body += table_entry

In [23]:
print(table_body)


| 2023-09-08 | 12:00 PM - 01:30 PM | Building your Programming Tool Box: Command Line Crash Course for Beginners | cli/cli_main.md 
| 2023-09-13 | 01:00 PM - 03:00 PM | Introduction to Python for Data Science Part 1: Introduction to Basic Programming with Data | Python_Series_Materials/part_1/part_1.md 
| 2023-09-15 | 12:00 PM - 01:30 PM | Building Your Programming Toolbox: Version Control with Git | https://swcarpentry.github.io/git-novice/ 
| 2023-09-20 | 01:00 PM - 03:00 PM | Introduction to Python for Data Science Part 2: Plotting and Analyzing Tabular Datasets | Python_Series_Materials/part_3/part_3.md 
| 2023-09-22 | 12:00 PM - 01:30 PM | Building Your Programming Toolbox: Collaborating with GitHub | https://swcarpentry.github.io/git-novice/ 
| 2023-09-27 | 01:00 PM - 03:00 PM | Introduction to Python for Data Science Part 3: Analyzing Data with Logic and Iteration | 
| 2023-09-28 | 12:00 PM - 01:00 PM | Introduction to Research Data Management and Data Management Plans | 
| 202

In [32]:
index_md = f"""---
layout: default
title: Home
nav_order: 1
---
# Open Data Science Workshops
Hosted by the Open Science program within the
[Carnegie Mellon University (CMU) Libraries](https://www.library.cmu.edu/)

This website highlights workshops from CMU libraries that help
build computational skills for Open Research and Data Science.

## Workshop Calendar
{table_body}
## Data Office Hours

Schedule a [Data Consultation](https://library.cmu.edu/service/data-office-hours) with our library associates for support with data and research!

![Data Office Hours Flyer](content/img/data-office-hours.png)

## Coordinators

Melanie Gainey (she/her/hers)  
Open Science Program Director/Librarian  
Office: Library 431, Mellon library  
[mgainey@andrew.cmu.edu](mailto:mgainey@andrew.cmu.edu) | [Schedule a Consultation](https://cmu.libcal.com/appointment/42420)

Chasz Griego (he/him/his)  
Open Science Postdoctoral Associate  
Office: 4416, Sorrells Library  
[cgriego@andrew.cmu.edu](mailto:cgriego@andrew.cmu.edu) | [Schedule a Consultation](https://cmu.libcal.com/appointments/cgriego)

Lencia Beltran (she/her/hers)  
Open Science Project Coordinator  
Office: 4416, Sorrells Library  
[lbeltran@andrew.cmu.edu](mailto:lbeltran@andrew.cmu.edu) | [Schedule a Consultation](https://cmu.libcal.com/appointments/lencia)

Emma Slayton (she/her/hers)  
Data Curation, Visualization, and GIS Specialist  
Office: 4408, Sorrells Library  
[eslayton@andrew.cmu.edu](mailto:eslayton@andrew.cmu.edu) | [Schedule a Consultation](https://cmu.libcal.com/appointment/41060)

Sarah Young (she/her/hers)  
Principal Librarian  
Office: 109G, Hunt Library  
[sarahy@andrew.cmu.edu](mailto:sarahy@andrew.cmu.edu) | [Schedule a Consultation](https://cmu.libcal.com/appointments/sarahy)
"""

In [33]:
print(index_md)

---
layout: default
title: Home
nav_order: 1
---
# Open Data Science Workshops
Hosted by the Open Science program within the
[Carnegie Mellon University (CMU) Libraries](https://www.library.cmu.edu/)

This website highlights workshops from CMU libraries that help
build computational skills for Open Research and Data Science.

## Workshop Calendar

| 2023-09-08 | 12:00 PM - 01:30 PM | Building your Programming Tool Box: Command Line Crash Course for Beginners | cli/cli_main.md 
| 2023-09-13 | 01:00 PM - 03:00 PM | Introduction to Python for Data Science Part 1: Introduction to Basic Programming with Data | Python_Series_Materials/part_1/part_1.md 
| 2023-09-15 | 12:00 PM - 01:30 PM | Building Your Programming Toolbox: Version Control with Git | https://swcarpentry.github.io/git-novice/ 
| 2023-09-20 | 01:00 PM - 03:00 PM | Introduction to Python for Data Science Part 2: Plotting and Analyzing Tabular Datasets | Python_Series_Materials/part_3/part_3.md 
| 2023-09-22 | 12:00 PM - 01:30 PM

In [34]:
with open("./index.md", "w") as file:
    file.write(index_md)