In [2]:
import scrapy
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
import logging

class SeekingSpider(scrapy.Spider):
    
    configure_logging(install_root_handler=False)
    logging.basicConfig(
        filename='./logs/scrapy_seeking_log.txt',
        format='%(levelname)s: %(message)s',
        level=logging.INFO
    )

    name = "seeking"
    start_urls = [
        'https://seekingalpha.com/latest-articles',
    ]

    def parse(self, response):
        for article in response.xpath("//ul[@class='articles-list']/li"):
            yield {
                'article_name': article.xpath(".//div[@class='media-body']/a[@class='a-title']/text()").extract_first(),
                'article_link': article.xpath(".//div[@class='media-body']/a[@class='a-title']/@href").extract_first()
            }
        
        next_page=response.xpath("//li[@class='next']/a/@href").extract_first()
        if next_page is not None:
            next_page_link= response.urljoin(next_page)
            yield scrapy.Request(url=next_page_link, callback=self.parse)


In [3]:
from scrapy.utils.project import get_project_settings
from scrapy.crawler import CrawlerProcess

settings = get_project_settings()
settings.overrides['FEED_FORMAT'] = 'json'
settings.overrides['FEED_URI'] = './results/result.json'

process = CrawlerProcess(get_project_settings())

process.crawl(SeekingSpider)
process.start() 

2018-12-28 08:51:19 [scrapy.utils.log] INFO: Scrapy 1.5.1 started (bot: scrapybot)
2018-12-28 08:51:19 [scrapy.utils.log] INFO: Versions: lxml 3.8.0.0, libxml2 2.9.8, cssselect 1.0.3, parsel 1.5.1, w3lib 1.19.0, Twisted 18.9.0, Python 3.6.7 |Anaconda, Inc.| (default, Dec 10 2018, 20:35:02) [MSC v.1915 64 bit (AMD64)], pyOpenSSL 18.0.0 (OpenSSL 1.1.1a  20 Nov 2018), cryptography 2.4.2, Platform Windows-10-10.0.17134-SP0
2018-12-28 08:51:19 [scrapy.crawler] INFO: Overridden settings: {}
2018-12-28 08:51:19 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.logstats.LogStats']
2018-12-28 08:51:19 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
 'scrapy.downloadermiddlewares.u

2018-12-28 08:51:19 [scrapy.core.scraper] DEBUG: Scraped from <200 https://seekingalpha.com/latest-articles>
{'article_name': 'Weekly Review: Master Limited Partnership CEFs - The Highest Discount In The Sector Is -5.25%', 'article_link': '/article/4230440-weekly-review-master-limited-partnership-cefs-highest-discount-sector-minus-5_25-percent'}
2018-12-28 08:51:19 [scrapy.core.scraper] DEBUG: Scraped from <200 https://seekingalpha.com/latest-articles>
{'article_name': "Macy's: Grab 5% Yield While You Can", 'article_link': '/article/4230439-macys-grab-5-percent-yield-can'}
2018-12-28 08:51:19 [scrapy.core.scraper] DEBUG: Scraped from <200 https://seekingalpha.com/latest-articles>
{'article_name': 'General Electric Healthcare IPO Is Too Risky In This Environment', 'article_link': '/article/4230438-general-electric-healthcare-ipo-risky-environment'}
2018-12-28 08:51:19 [scrapy.core.scraper] DEBUG: Scraped from <200 https://seekingalpha.com/latest-articles>
{'article_name': 'How Liquidity

2018-12-28 08:51:19 [scrapy.core.scraper] DEBUG: Scraped from <200 https://seekingalpha.com/latest-articles>
{'article_name': 'Vanadium Miners News For The Month Of December 2018', 'article_link': '/article/4230406-vanadium-miners-news-month-december-2018'}
2018-12-28 08:51:19 [scrapy.core.scraper] DEBUG: Scraped from <200 https://seekingalpha.com/latest-articles>
{'article_name': 'Apple: A Buying Opportunity Has Arrived', 'article_link': '/article/4230405-apple-buying-opportunity-arrived'}
2018-12-28 08:51:19 [scrapy.core.scraper] DEBUG: Scraped from <200 https://seekingalpha.com/latest-articles>
{'article_name': 'SBM Offshore Is Making Adjustments To Find Growth', 'article_link': '/article/4230404-sbm-offshore-making-adjustments-find-growth'}
2018-12-28 08:51:19 [scrapy.core.scraper] DEBUG: Scraped from <200 https://seekingalpha.com/latest-articles>
{'article_name': "AquaVenture's Quench Acquires Pure Health Solutions For Market Expansion", 'article_link': '/article/4230403-aquaventu