In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
class Article:
    
    def __init__(self, title, body_text, tags, image):
        self.title = title
        self.body_text = body_text
        self.tags = tags
        self.image = image
        
    def __str__(self):
        return "Article -- (Title: {},\n\nBody Text: {},\n\nTags: {},\n\nImage: {})" \
                .format(self.title, self.body_text, self.tags, self.image)

In [3]:
class ArticleScrape:
    
    def __init__(self):
        self.headers = requests.utils.default_headers()
        self.headers.update({
            'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
        })

    def scrape_article(self, url):
        r = requests.get(url, self.headers)
        raw_html = r.content
        soup_html = BeautifulSoup(raw_html, 'html.parser')
        
        # Commence Article Scraping
        a_title = self.__retrieve_article_title(soup_html)
        a_p = self.__retrieve_p_elements(soup_html)
        a_tags = self.__retrieve_article_tags(soup_html)
        a_image = self.__retrieve_article_image(soup_html)
        
        article = Article(a_title, a_p, a_tags, a_image)

        return article

    def __retrieve_article_title(self, soup_html):
        return soup_html.find('title').text

    def __retrieve_p_elements(self, soup_html, get_text=False):
        if get_text:
            return [p.text for p in soup_html.findAll('p')][:-3]

        return soup_html.findAll('p')[:-3]

    def __retrieve_article_tags(self, soup_html):
        return soup_html.findAll("span", class_="cb-element")

    def __retrieve_article_image(self, soup_html):
        img_dict = {}
        img_tag = soup_html.find(class_='wp-caption')

        # Information to retrieve
        img_link = img_tag.a['href']
        img_alt = img_tag.img['alt']
        img_text = img_tag.text

        img_dict['link'] = img_link
        img_dict['alt'] = img_alt
        img_dict['text'] = img_text

        return img_dict

In [4]:
url = "https://neurosciencenews.com/moving-object-background-14424/"

In [5]:
a_scraper = ArticleScrape()

In [6]:
article = a_scraper.scrape_article(url)

In [7]:
article.title

'Why are we able to see moving objects against moving backgrounds? - Neuroscience News'

In [8]:
article.body_text

[<p><em><strong>Summary: </strong>The human brain can desensitize background motion and focus on smaller moving objects in the foreground as a result of activity in the middle temporal visual area. However, our ability to pick out smaller objects changes over time. Younger people are better at picking out foreground objects moving, while those over 65 have heightened awareness of objects moving in the background.</em></p>,
 <p><strong>Source: </strong>University of Rochester</p>,
 <p><strong>Visual motion is an important source of information for separating objects from their backgrounds.</strong></p>,
 <p>A spider camouflaged against a branch, for instance, immediately loses its invisibility once it starts moving. A friend you’re trying to spot in a crowded airport terminal is more distinguishable once she begins waving her hands.</p>,
 <p>While the process of separating an object from a background is seemingly effortless, researchers don’t know how our visual system manages to rapidl