In [43]:
import pandas as pd
from requests_html import HTMLSession
from collections import namedtuple

In [19]:
session = HTMLSession()

In [20]:
url = "https://s4.goeshow.com/acrl/national/2019/conference_schedule.cfm"

In [21]:
r = session.get(url)

In [25]:
sessions = r.html.find("#sess_table")

In [61]:
ConfSession = namedtuple("ConfSession", ["title", "speakers", "link", "tag"])

In [92]:
def parse_session(sessionHTML):
    try:
        title = sessionHTML.find(".schedule-session", first=True).text
    except (IndexError, AttributeError):
        title = "NO_TITLE"

    try:
        link_raw = sessionHTML.find(".schedule-session", first=True).attrs["href"]
        link = link_raw.strip()
    except (IndexError, AttributeError, KeyError):
        link = "NO_LINK"

    try:
        speakers = sessionHTML.find(".sessionPrimarySpeaker", first=True).text.split(":")[1].strip()
    except (IndexError, AttributeError):
        speakers = "NO_SPEAKERS"

    try:
        tag_line = sessionHTML.find("tr")[3].text
        tag = tag_line.split(":")[1].strip()
    except (IndexError, AttributeError):
        tag = "NO_TAG"

    
    return ConfSession(title, speakers, link, tag)

In [93]:
for s in sessions:
    print(parse_session(s))

, Karen Diller', link="javascript:ExhibitorPopup('profile.cfm?profile_name=session&master_key=01B88E97-FC04-3890-4BDE-59E525841E75&page_key=126CB9A0-B53A-2A1E-9827-DF2938A26C80&xtemplate&userLGNKEY=0','session_profile');", tag='Assessment')
ConfSession(title='Leveraging Visual Literacy to Engage and Orient First-Year College Students in the Library', speakers='Melissa Clark', link="javascript:ExhibitorPopup('profile.cfm?profile_name=session&master_key=01BD7AF0-B01B-1484-CD21-97ACA6CC0C3B&page_key=126CB9A0-B53A-2A1E-9827-DF2938A26C80&xtemplate&userLGNKEY=0','session_profile');", tag='Teaching and Learning')
ConfSession(title='When Roles Collide: Librarians as Educators and the Question of Learning Analytics', speakers='Megan Oakleaf, Malcolm Brown, Dean Hendrix, Scott Walter, Joe Lucia', link="javascript:ExhibitorPopup('profile.cfm?profile_name=session&master_key=01C73AE9-FCBF-3153-67A1-2BA3FB850D6F&page_key=126CB9A0-B53A-2A1E-9827-DF2938A26C80&xtemplate&userLGNKEY=0','session_profile')

In [94]:
conf_sessions = [parse_session(s) for s in sessions]

In [95]:
conf_sessions[:5]

[ConfSession(title='Taking Charge of Your Narrative (*additional fee required)', speakers='Alexia Hudson-Ward, Toni Olivas, Jon Cawthorne, Mary Ellen Davis, Gerald Holmes, Trevor Dawes, Jody Gray, Elaine Westbrooks', link="javascript:ExhibitorPopup('profile.cfm?profile_name=session&master_key=0DFF903A-A477-DC5D-B3B4-C61346230B4A&page_key=126CB9A0-B53A-2A1E-9827-DF2938A26C80&xtemplate&userLGNKEY=0','session_profile');", tag='Professional/Staff Development'),
 ConfSession(title='Engaged Students are Empowered Students: Instructional Design Matters (*additional fee required)', speakers='Claire Holmes, Meg Grotti, Megan Oakleaf, Melissa Cadena', link="javascript:ExhibitorPopup('profile.cfm?profile_name=session&master_key=01B9AE26-A889-E82D-34AF-8AEE98462447&page_key=126CB9A0-B53A-2A1E-9827-DF2938A26C80&xtemplate&userLGNKEY=0','session_profile');", tag='Teaching and Learning'),
 ConfSession(title='Lead Your Library Towards Accessibility: Knowledge & tools for any librarian to promote a cult

In [96]:
df = pd.DataFrame.from_records(conf_sessions, columns=ConfSession._fields)

In [97]:
df

Unnamed: 0,title,speakers,link,tag
0,Taking Charge of Your Narrative (*additional f...,"Alexia Hudson-Ward, Toni Olivas, Jon Cawthorne...",javascript:ExhibitorPopup('profile.cfm?profile...,Professional/Staff Development
1,Engaged Students are Empowered Students: Instr...,"Claire Holmes, Meg Grotti, Megan Oakleaf, Meli...",javascript:ExhibitorPopup('profile.cfm?profile...,Teaching and Learning
2,Lead Your Library Towards Accessibility: Knowl...,"Stephanie Rosen, Heidi Schroeder, Andy Andrews",javascript:ExhibitorPopup('profile.cfm?profile...,Professional/Staff Development
3,OER+ScholarlyCommunication (*additional fee re...,"Maria Bonn, Will Cross, Josh Bolick",javascript:ExhibitorPopup('profile.cfm?profile...,Scholarly Communication
4,"Planning, Assessing, and Communicating Library...","Lisa Stillwell, Sharon Mader",javascript:ExhibitorPopup('profile.cfm?profile...,Assessment
...,...,...,...,...
614,Safe for Work: Online Professionalism Instruct...,"Christina L. Wissinger, Carmen Cole",javascript:ExhibitorPopup('profile.cfm?profile...,Teaching and Learning
615,Guided Meditation,NO_SPEAKERS,javascript:ExhibitorPopup('profile.cfm?profile...,NO_TAG
616,Raspberry Pi a Platform for Innovation -- Solv...,Colin Nickels,javascript:ExhibitorPopup('profile.cfm?profile...,Technology and Tools
617,Closing Keynote: Alison Bechdel,NO_SPEAKERS,javascript:ExhibitorPopup('profile.cfm?profile...,NO_TAG


In [126]:
def build_link(raw_link):
    base_url = "https://s4.goeshow.com/acrl/national/2019/"
    if raw_link != "NO_LINK":            
        chunks = raw_link.split("'")
        query = chunks[1].replace("&amp;", "&")
        return f"{base_url}{query}"
    else:
        return raw_link

In [127]:
test_string = "javascript:ExhibitorPopup('profile.cfm?profile_name=session&amp;master_key=01BB987C-DC28-663C-A754-F2985B7BDC50&amp;page_key=126CB9A0-B53A-2A1E-9827-DF2938A26C80&amp;xtemplate&amp;userLGNKEY=0','session_profile');"

In [128]:
build_link(test_string)

'https://s4.goeshow.com/acrl/national/2019/profile.cfm?profile_name=session&master_key=01BB987C-DC28-663C-A754-F2985B7BDC50&page_key=126CB9A0-B53A-2A1E-9827-DF2938A26C80&xtemplate&userLGNKEY=0'

In [129]:
df["link"] = df["link"].apply(build_link)

In [130]:
df

Unnamed: 0,title,speakers,link,tag
0,Taking Charge of Your Narrative (*additional f...,"Alexia Hudson-Ward, Toni Olivas, Jon Cawthorne...",https://s4.goeshow.com/acrl/national/2019/prof...,Professional/Staff Development
1,Engaged Students are Empowered Students: Instr...,"Claire Holmes, Meg Grotti, Megan Oakleaf, Meli...",https://s4.goeshow.com/acrl/national/2019/prof...,Teaching and Learning
2,Lead Your Library Towards Accessibility: Knowl...,"Stephanie Rosen, Heidi Schroeder, Andy Andrews",https://s4.goeshow.com/acrl/national/2019/prof...,Professional/Staff Development
3,OER+ScholarlyCommunication (*additional fee re...,"Maria Bonn, Will Cross, Josh Bolick",https://s4.goeshow.com/acrl/national/2019/prof...,Scholarly Communication
4,"Planning, Assessing, and Communicating Library...","Lisa Stillwell, Sharon Mader",https://s4.goeshow.com/acrl/national/2019/prof...,Assessment
...,...,...,...,...
614,Safe for Work: Online Professionalism Instruct...,"Christina L. Wissinger, Carmen Cole",https://s4.goeshow.com/acrl/national/2019/prof...,Teaching and Learning
615,Guided Meditation,NO_SPEAKERS,https://s4.goeshow.com/acrl/national/2019/prof...,NO_TAG
616,Raspberry Pi a Platform for Innovation -- Solv...,Colin Nickels,https://s4.goeshow.com/acrl/national/2019/prof...,Technology and Tools
617,Closing Keynote: Alison Bechdel,NO_SPEAKERS,https://s4.goeshow.com/acrl/national/2019/prof...,NO_TAG


In [131]:
df.to_csv("acrl_2019.csv")