# BeatifulSoup

In [1]:
import requests
from bs4 import BeautifulSoup

def get_upcoming_events(url):
    req = requests.get(url)

    soup = BeautifulSoup(req.text, 'lxml')

    events = soup.find('ul', {'class': 'list-recent-events'}).findAll('li')

    for event in events:
        event_details = dict()
        event_details['name'] = event.find('h3').find("a").text
        event_details['location'] = event.find('span', {'class', 'event-location'}).text
        event_details['time'] = event.find('time').text
        print(event_details)

get_upcoming_events('https://www.python.org/events/python-events/')

{'name': 'Python for Signal Processing Algorithms Implementation (Tamilnadu)', 'location': 'ECE Department Seminar Hall, IRTT, Erode, Tamilnadu, INDIA', 'time': '22 March 2020 3:30am UTC – 11:30am UTC'}
{'name': 'PyCon SK 2020', 'location': 'Bratislava, Slovakia', 'time': '27 March – 29 March  2020'}
{'name': 'MoscowPythonConf++', 'location': 'Moscow, Russia', 'time': '27 March 2020'}
{'name': 'PyCon US 2020', 'location': 'Pittsburgh, PA, USA', 'time': '15 April – 23 April  2020'}
{'name': 'Django Day Copenhagen', 'location': 'Copenhagen, Denmark', 'time': '17 April 2020'}
{'name': 'DragonPy 2020', 'location': 'Ljubljana, Slovenia', 'time': '18 April – 19 April  2020'}


In [2]:
import requests
import json

# builds on top of urllib3's connection pooling
# session reuses the same TCP connection if 
# requests are made to the same host
# see https://en.wikipedia.org/wiki/HTTP_persistent_connection for details
session=requests.Session()

# You may pass in custom cookie
r=session.get('http://httpbin.org/get',cookies={'my-cookie':'browser'})
print(r.text)
# '{"cookies": {"my-cookie": "test cookie"}}'

# Streaming is another nifty feature
# From http://docs.python-requests.org/en/master/user/advanced/#streaming-requests
# copyright belongs to reques.org
r = requests.get('http://httpbin.org/stream/20', stream=True)

for line in r.iter_lines():
  # filter out keep-alive new lines
  if line:
        decoded_line = line.decode('utf-8')
        print(json.loads(decoded_line))

{
  "args": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Cookie": "my-cookie=browser", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.22.0", 
    "X-Amzn-Trace-Id": "Root=1-5e6aef78-aadf264833970f8efc77105f"
  }, 
  "origin": "181.42.12.92", 
  "url": "http://httpbin.org/get"
}

{'url': 'http://httpbin.org/stream/20', 'args': {}, 'headers': {'Host': 'httpbin.org', 'X-Amzn-Trace-Id': 'Root=1-5e6aef78-144788c0f5f80000a65c5f80', 'User-Agent': 'python-requests/2.22.0', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*'}, 'origin': '181.42.12.92', 'id': 0}
{'url': 'http://httpbin.org/stream/20', 'args': {}, 'headers': {'Host': 'httpbin.org', 'X-Amzn-Trace-Id': 'Root=1-5e6aef78-144788c0f5f80000a65c5f80', 'User-Agent': 'python-requests/2.22.0', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*'}, 'origin': '181.42.12.92', 'id': 1}
{'url': 'http://httpbin.org/stream/20', 'args': {}, 'headers': {'Host': 'httpbin.org', 'X-Amzn-Trace

# Urllib3

In [3]:
import urllib3
from bs4 import BeautifulSoup

def get_upcoming_events(url):
    req = urllib3.PoolManager()
    res = req.request('GET', url)

    soup = BeautifulSoup(res.data, 'html.parser')

    events = soup.find('ul', {'class': 'list-recent-events'}).findAll('li')

    for event in events:
        event_details = dict()
        event_details['name'] = event.find('h3').find("a").text
        event_details['location'] = event.find('span', {'class', 'event-location'}).text
        event_details['time'] = event.find('time').text
        print(event_details)

get_upcoming_events('https://www.python.org/events/python-events/')



{'name': 'Python for Signal Processing Algorithms Implementation (Tamilnadu)', 'location': 'ECE Department Seminar Hall, IRTT, Erode, Tamilnadu, INDIA', 'time': '22 March 2020 3:30am UTC – 11:30am UTC'}
{'name': 'PyCon SK 2020', 'location': 'Bratislava, Slovakia', 'time': '27 March – 29 March  2020'}
{'name': 'MoscowPythonConf++', 'location': 'Moscow, Russia', 'time': '27 March 2020'}
{'name': 'PyCon US 2020', 'location': 'Pittsburgh, PA, USA', 'time': '15 April – 23 April  2020'}
{'name': 'Django Day Copenhagen', 'location': 'Copenhagen, Denmark', 'time': '17 April 2020'}
{'name': 'DragonPy 2020', 'location': 'Ljubljana, Slovenia', 'time': '18 April – 19 April  2020'}


# Scrapy

In [2]:
import scrapy
from scrapy.crawler import CrawlerProcess

class PythonEventsSpider(scrapy.Spider):
    name = 'pythoneventsspider'

    start_urls = ['https://www.python.org/events/python-events/',]
    found_events = []

    def parse(self, response):
        for event in response.xpath('//ul[contains(@class, "list-recent-events")]/li'):
            event_details = dict()
            event_details['name'] = event.xpath('h3[@class="event-title"]/a/text()').extract_first()
            event_details['location'] = event.xpath('p/span[@class="event-location"]/text()').extract_first()
            event_details['time'] = event.xpath('p/time/text()').extract_first()
            self.found_events.append(event_details)

if __name__ == "__main__":
    process = CrawlerProcess({ 'LOG_LEVEL': 'ERROR'})
    process.crawl(PythonEventsSpider)
    spider = next(iter(process.crawlers)).spider
    process.start()

    for event in spider.found_events: print(event)

{'name': 'Python for Signal Processing Algorithms Implementation (Tamilnadu)', 'location': 'ECE Department Seminar Hall, IRTT, Erode, Tamilnadu, INDIA', 'time': '22 March'}
{'name': 'PyCon SK 2020', 'location': 'Bratislava, Slovakia', 'time': '27 March – 29 March '}
{'name': 'MoscowPythonConf++', 'location': 'Moscow, Russia', 'time': '27 March'}
{'name': 'PyCon US 2020', 'location': 'Pittsburgh, PA, USA', 'time': '15 April – 23 April '}
{'name': 'Django Day Copenhagen', 'location': 'Copenhagen, Denmark', 'time': '17 April'}
{'name': 'DragonPy 2020', 'location': 'Ljubljana, Slovenia', 'time': '18 April – 19 April '}
{'name': 'HackBVICAM National Student’s Convention 2k20', 'location': 'New Delhi, India', 'time': '13 March'}
{'name': 'PyCon Belarus 2020', 'location': 'Minsk, Belarus', 'time': '21 Feb. – 22 Feb. '}


# Selenium

In [4]:
from selenium import webdriver

def get_upcoming_events(url):
    
    options = webdriver.FirefoxOptions()
    options.add_argument('-headless')
    driver = webdriver.Firefox(firefox_options=options)
    driver.get(url)

    events = driver.find_elements_by_xpath('//ul[contains(@class, "list-recent-events")]/li')

    for event in events:
        event_details = dict()
        event_details['name'] = event.find_element_by_xpath('h3[@class="event-title"]/a').text
        event_details['location'] = event.find_element_by_xpath('p/span[@class="event-location"]').text
        event_details['time'] = event.find_element_by_xpath('p/time').text
        print(event_details)

    driver.close()

get_upcoming_events('https://www.python.org/events/python-events/')

{'name': 'Python for Signal Processing Algorithms Implementation (Tamilnadu)', 'location': 'ECE Department Seminar Hall, IRTT, Erode, Tamilnadu, INDIA', 'time': '22 March 3:30am UTC – 11:30am UTC'}
{'name': 'PyCon SK 2020', 'location': 'Bratislava, Slovakia', 'time': '27 March – 29 March'}
{'name': 'MoscowPythonConf++', 'location': 'Moscow, Russia', 'time': '27 March'}
{'name': 'PyCon US 2020', 'location': 'Pittsburgh, PA, USA', 'time': '15 April – 23 April'}
{'name': 'Django Day Copenhagen', 'location': 'Copenhagen, Denmark', 'time': '17 April'}
{'name': 'DragonPy 2020', 'location': 'Ljubljana, Slovenia', 'time': '18 April – 19 April'}
{'name': 'HackBVICAM National Student’s Convention 2k20', 'location': 'New Delhi, India', 'time': '13 March'}
{'name': 'PyCon Belarus 2020', 'location': 'Minsk, Belarus', 'time': '21 Feb. – 22 Feb.'}


# PhantomJS

In [1]:
from selenium import webdriver

def get_upcoming_events(url):
    
    driver = webdriver.PhantomJS('phantomjs')
    driver.get(url)

    events = driver.find_elements_by_xpath('//ul[contains(@class, "list-recent-events")]/li')

    for event in events:
        event_details = dict()
        event_details['name'] = event.find_element_by_xpath('h3[@class="event-title"]/a').text
        event_details['location'] = event.find_element_by_xpath('p/span[@class="event-location"]').text
        event_details['time'] = event.find_element_by_xpath('p/time').text
        print(event_details)

    driver.close()

get_upcoming_events('https://www.python.org/events/python-events/')



{'name': 'Python for Signal Processing Algorithms Implementation (Tamilnadu)', 'location': 'ECE Department Seminar Hall, IRTT, Erode, Tamilnadu, INDIA', 'time': '22 March 3:30am UTC – 11:30am UTC'}
{'name': 'PyCon SK 2020', 'location': 'Bratislava, Slovakia', 'time': '27 March – 29 March'}
{'name': 'MoscowPythonConf++', 'location': 'Moscow, Russia', 'time': '27 March'}
{'name': 'PyCon US 2020', 'location': 'Pittsburgh, PA, USA', 'time': '15 April – 23 April'}
{'name': 'Django Day Copenhagen', 'location': 'Copenhagen, Denmark', 'time': '17 April'}
{'name': 'DragonPy 2020', 'location': 'Ljubljana, Slovenia', 'time': '18 April – 19 April'}
{'name': 'HackBVICAM National Student’s Convention 2k20', 'location': 'New Delhi, India', 'time': '13 March'}
{'name': 'PyCon Belarus 2020', 'location': 'Minsk, Belarus', 'time': '21 Feb. – 22 Feb.'}
