In [1]:
from datetime import datetime
import sys
import requests
from bs4 import BeautifulSoup
import logging
import re
import math

In [2]:
def soupify_event_page(url = 'https://mdflora.org/calendar'):
    try:
        r = requests.get(url)
    except Exception as e:
        logger.critical(f'Exception making GET to {url}: {e}', exc_info = True)
        return
    content = r.content
    soup = BeautifulSoup(content, 'html.parser')

    return soup

In [3]:
soup = soupify_event_page()

In [4]:
def get_event_url(event_calender_soup):

    td = event_calender_soup.find_all("td", class_="EventListCalendarItemDefault")

    # Following scrapes all activie items on the calender. 
    # Inactive calender items (from the previous month or next month, but still displayed) have class 'EventListCalendarItemInactive'
    tds = soup.find_all("td", {'class':'text'},  class_="EventListCalendarItemDefault")

    # If the calender cell has an event, then it has a href. 
    a_list = [td.find('a', href=True) for td in tds if td.find('a', href=True)]
    event_list = [{'name' : a.contents[0], 'info' : a['title'], 'url': a['href']} for a in a_list]
    return(event_list)

In [5]:
event_info = get_event_url(soup)

In [6]:
event_info

[{'name': 'Wild Ones Native Landscapes meeting (Saturday)',
  'info': 'n[10:30 AM]02/08/2020, 10:30 AM  12:00 PM (EST)\r\nCrossway Community Center, 3015 Upton Drive, Kensington, MD',
  'url': 'https://mdflora.org/event-3692905?CalendarViewType=1&SelectedDate=2/11/2020'},
 {'name': 'Western Mountains: February Meeting & Program',
  'info': 'n[7:00 PM]02/13/2020, 7:00 PM (EST)\r\nFrostburg State University, Compton Science Center, Room 327',
  'url': 'https://mdflora.org/event-3698694?CalendarViewType=1&SelectedDate=2/11/2020'},
 {'name': 'MNPS Field Trip: Cheverly Woodworth Park (Saturday)',
  'info': 'n[10:00 AM]02/15/2020, 10:00 AM  11:30 AM (EST)\r\nCheverly Prince George&#39;s County, MD',
  'url': 'https://mdflora.org/event-3730521?CalendarViewType=1&SelectedDate=2/11/2020'},
 {'name': 'MNPS Field Trip: Cheverly Nature Park (Sunday)',
  'info': 'n[10:00 AM]02/16/2020, 10:00 AM  11:30 AM (EST)\r\nCheverly Prince George&#39;s County, MD',
  'url': 'https://mdflora.org/event-3730529?

In [7]:
event = event_info[0]
event_soup = soupify_event_page(event['url'])

In [8]:
event_soup

<!DOCTYPE html>

<!--[if lt IE 7 ]><html lang="en" class="no-js ie6 "><![endif]-->
<!--[if IE 7 ]><html lang="en" class="no-js ie7 "> <![endif]-->
<!--[if IE 8 ]><html lang="en" class="no-js ie8 "> <![endif]-->
<!--[if IE 9 ]><html lang="en" class="no-js ie9 "><![endif]-->
<!--[if (gt IE 9)|!(IE)]><!--><html class="no-js" lang="en"> <!--<![endif]-->
<head id="Head1">
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<link href="https://sf.wildapricot.org/BuiltTheme/firma_above_the_clouds.v3.0/current/d9dc6949/styles/combined.css" rel="stylesheet" type="text/css"/><link href="https://mdflora.org/resources/theme/customStyles.css?t=636463930460000000" rel="stylesheet" type="text/css"/><link href="https://mdflora.org/resources/theme/user.css?t=636435848330000000" rel="stylesheet" type="text/css"/><link href="//sf.wildapricot.org/WebUI/built7.8.0.11899/css/shared/ui/shared-ui-compiled.css" rel="stylesheet" type="text/css"/><script id="idJavaScriptEnvironment" language="j

Information to collect
**** means done

                  'Event Name': event_name, ****
                  'Event Website': event_website, *
                  'Event Start Date': dates[0], ****
                  'Event Start Time': start_time, ****
                  'Event End Date': dates[1], ****
                  'Event End Time': timing[1], ****
                  'Event Venue Name': event_venue, ****
                  'Timezone':'America/New_York', ****
                  'Event Cost': event_cost, 
                  'Event Description': event_description, 
                  'Event Category': event_category, 
                  'Event Organizers': "Maryland Native Plant Society", ****
                  'Event Currency Symbol':'$',
                  'All Day Event': all_day

In [None]:
def get_event_info(event_soup, classname):
    info = event_soup.find(class_=classname) \
        .find(class_="eventInfoBoxValue") \
        .contents[0].text 
    return(info)

def get_event_dates(event_soup):
    start_date = ''
    end_date = ''
    multi_day_event = False 
    
    start_date = event_soup.find(class_= "eventInfoStartDate") \
        .find(class_="eventInfoBoxValue") \
        .contents[0].text  
    start_date = datetime.strptime(start_date, "%M/%d/%Y").strftime("%M-%d-%Y")

    return start_date, end_date, multi_day_event

def get_event_name(event_soup):
    event_name = event_soup.find(class_='pageTitle').contents[0].strip()
    return(event_name)

def get_event_timing(event_soup):
    times = event_soup.find(class_= "eventInfoStartTime") \
        .find(class_="eventInfoBoxValue") \
        .contents[0].text 
    times = [t.strip() for t in times.split('-')]
    
    if len(times)== 2:
        start_time = datetime.strptime(times[0], "%I:%M %p").strftime('%H:%M:%S')
        end_time = datetime.strptime(times[1], "%I:%M %p").strftime('%H:%M:%S')
    elif len(times)==1:
        start_time = datetime.strptime(times[0], "%I:%M %p").strftime('%H:%M:%S')
        end_time = ''
        
    return(start_time, end_time)

def get_event_venue(event_soup):
    venue = event_soup.find(class_ = 'eventInfoLocation') \
        .find(class_ = 'eventInfoBoxValue') \
        .contents[0].text.strip()
    return(venue)

def get_event_cost(event_soup):
    pass
    