Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
35 lines (24 sloc) 952 Bytes
#!/usr/bin/env python
"""
Target: https://news.ycombinator.com/
pip install aiofiles
"""
import aiofiles
from ruia import AttrField, TextField, Item, Spider
class HackerNewsItem(Item):
target_item = TextField(css_select='tr.athing')
title = TextField(css_select='a.storylink')
url = AttrField(css_select='a.storylink', attr='href')
async def clean_title(self, value):
return value.strip()
class HackerNewsSpider(Spider):
start_urls = ['https://news.ycombinator.com/news?p=1', 'https://news.ycombinator.com/news?p=2']
concurrency = 10
async def parse(self, response):
async for item in HackerNewsItem.get_items(html=response.html):
yield item
async def process_item(self, item: HackerNewsItem):
async with aiofiles.open('./hacker_news.txt', 'a') as f:
await f.write(str(item.title) + '\n')
if __name__ == '__main__':
HackerNewsSpider.start(middleware=None)
You can’t perform that action at this time.