In [1]:
from datetime import date
import pandas as pd
import re
import requests

## Pull data from the Drupal workshops API

In [2]:
# As of Feb 4, 2021, the endpoint returns workshop data from the last two years - ~1300 nodes
url = "https://www.lib.ncsu.edu/api/workshops/all"

In [3]:
r = requests.get(url)
r

<Response [200]>

In [4]:
data = r.json()

In [5]:
data[0]

{'title': 'D.H. Hill Makerspace Orientation ',
 'nid': '53164',
 'field_time_d8': '02-11-2019 10:30AM to 02-11-2019 11:30AM',
 'body': 'Only currently affiliated NCSU students, faculty, and staff may attend and gain access to the Makerspace. An NCSU ID is required for this orientation.\n\xa0\nThis orientation is required for anyone wanting to access the D. H. Hill Jr. Makerspace. It will provide a general introduction to the space, best practices and for its use, and importantly, an introduction to potential hazards, protective equipment, and other aspects of safe Makerspace use.\n\xa0\nAfter this orientation, attendees will be given card access to use the Hill Makerspace during open hours.\nWORKSHOP PRE-REQUISITESAttendees must be currently affiliated NCSU students, faculty, or staff, and must present a current NCSU ID (Wolfpack One Card).\xa0',
 'field_registration_url': '<a href="https://reporter.ncsu.edu/link/instanceview?courseID=LIB-MAKER-ONBOARD&amp;deptName=LIB&amp;instanceID=0

In [6]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,title,nid,field_time_d8,body,field_registration_url,field_non_library_instructor,field_workshop_leads_export,field_workshop_series,field_workshop_user_activities,field_non_libraries_space_1,field_space
0,D.H. Hill Makerspace Orientation,53164,02-11-2019 10:30AM to 02-11-2019 11:30AM,"Only currently affiliated NCSU students, facul...","<a href=""https://reporter.ncsu.edu/link/instan...",,"[{'id': '1540', 'url': 'https://www.lib.ncsu.e...",Makerspace,Make at Hill,,"<a href=""/spaces/hill-library-makerspace"" href..."
1,"MATLAByrinth Part 3: Algebra, Calculus and Plo...",52882,02-11-2019 2:00PM to 02-11-2019 4:00PM,"MATLAByrinth, is a series of four comprehensiv...","<a href=""https://reporter.ncsu.edu/link/instan...","\nAmrutha Raghu, AmruthaRaghu.jpg\n\n",[],,,,"<a href=""/spaces/teaching-and-visualization-la..."
2,Orientation: Digital Media Making in the Libra...,53191,02-11-2019 4:30PM to 02-11-2019 5:00PM,"Excited to make videos and movies, record podc...","<a href=""https://reporter.ncsu.edu/link/instan...",,[],Digital Media,,,"<a href=""/spaces/digital-media-lab"" hreflang=""..."
3,Orientation: Digital Media Making in the Libra...,53184,02-11-2019 4:30PM to 02-11-2019 5:00PM,"Excited to make videos and movies, record podc...","<a href=""https://reporter.ncsu.edu/link/instan...",,[],Digital Media,,,"<a href=""/spaces/4k-video-studio"" hreflang=""un..."
4,The Escape Room,53245,02-11-2019 5:30PM to 02-11-2019 6:30PM,"Experience the NCSU Libraries Escape Room, an ...","<a href=""https://reporter.ncsu.edu/link/instan...",\nTaylor Rowland\n\n,[],,,,"<a href=""/spaces/escape-room"" hreflang=""und"">E..."


Strip white spaces from the `field_non_library_instructor` column. 

In [7]:
df["field_non_library_instructor"] = df["field_non_library_instructor"].str.strip()
df.head()

Unnamed: 0,title,nid,field_time_d8,body,field_registration_url,field_non_library_instructor,field_workshop_leads_export,field_workshop_series,field_workshop_user_activities,field_non_libraries_space_1,field_space
0,D.H. Hill Makerspace Orientation,53164,02-11-2019 10:30AM to 02-11-2019 11:30AM,"Only currently affiliated NCSU students, facul...","<a href=""https://reporter.ncsu.edu/link/instan...",,"[{'id': '1540', 'url': 'https://www.lib.ncsu.e...",Makerspace,Make at Hill,,"<a href=""/spaces/hill-library-makerspace"" href..."
1,"MATLAByrinth Part 3: Algebra, Calculus and Plo...",52882,02-11-2019 2:00PM to 02-11-2019 4:00PM,"MATLAByrinth, is a series of four comprehensiv...","<a href=""https://reporter.ncsu.edu/link/instan...","Amrutha Raghu, AmruthaRaghu.jpg",[],,,,"<a href=""/spaces/teaching-and-visualization-la..."
2,Orientation: Digital Media Making in the Libra...,53191,02-11-2019 4:30PM to 02-11-2019 5:00PM,"Excited to make videos and movies, record podc...","<a href=""https://reporter.ncsu.edu/link/instan...",,[],Digital Media,,,"<a href=""/spaces/digital-media-lab"" hreflang=""..."
3,Orientation: Digital Media Making in the Libra...,53184,02-11-2019 4:30PM to 02-11-2019 5:00PM,"Excited to make videos and movies, record podc...","<a href=""https://reporter.ncsu.edu/link/instan...",,[],Digital Media,,,"<a href=""/spaces/4k-video-studio"" hreflang=""un..."
4,The Escape Room,53245,02-11-2019 5:30PM to 02-11-2019 6:30PM,"Experience the NCSU Libraries Escape Room, an ...","<a href=""https://reporter.ncsu.edu/link/instan...",Taylor Rowland,[],,,,"<a href=""/spaces/escape-room"" hreflang=""und"">E..."


Remove HTML tags from `body`. 

In [8]:
def strip_html_tags(text: str) -> str:
    tag = re.compile("<.*?>")
    return re.sub(tag, "", text)

In [9]:
df["body"] = df["body"].apply(strip_html_tags)
df.head()

Unnamed: 0,title,nid,field_time_d8,body,field_registration_url,field_non_library_instructor,field_workshop_leads_export,field_workshop_series,field_workshop_user_activities,field_non_libraries_space_1,field_space
0,D.H. Hill Makerspace Orientation,53164,02-11-2019 10:30AM to 02-11-2019 11:30AM,"Only currently affiliated NCSU students, facul...","<a href=""https://reporter.ncsu.edu/link/instan...",,"[{'id': '1540', 'url': 'https://www.lib.ncsu.e...",Makerspace,Make at Hill,,"<a href=""/spaces/hill-library-makerspace"" href..."
1,"MATLAByrinth Part 3: Algebra, Calculus and Plo...",52882,02-11-2019 2:00PM to 02-11-2019 4:00PM,"MATLAByrinth, is a series of four comprehensiv...","<a href=""https://reporter.ncsu.edu/link/instan...","Amrutha Raghu, AmruthaRaghu.jpg",[],,,,"<a href=""/spaces/teaching-and-visualization-la..."
2,Orientation: Digital Media Making in the Libra...,53191,02-11-2019 4:30PM to 02-11-2019 5:00PM,"Excited to make videos and movies, record podc...","<a href=""https://reporter.ncsu.edu/link/instan...",,[],Digital Media,,,"<a href=""/spaces/digital-media-lab"" hreflang=""..."
3,Orientation: Digital Media Making in the Libra...,53184,02-11-2019 4:30PM to 02-11-2019 5:00PM,"Excited to make videos and movies, record podc...","<a href=""https://reporter.ncsu.edu/link/instan...",,[],Digital Media,,,"<a href=""/spaces/4k-video-studio"" hreflang=""un..."
4,The Escape Room,53245,02-11-2019 5:30PM to 02-11-2019 6:30PM,"Experience the NCSU Libraries Escape Room, an ...","<a href=""https://reporter.ncsu.edu/link/instan...",Taylor Rowland,[],,,,"<a href=""/spaces/escape-room"" hreflang=""und"">E..."


Write to csv, with date.

In [10]:
df.to_csv(f"all-workshops-{date.today()}.csv")