From 056c1c321acb48301fda9c73a991fe2914f20ac5 Mon Sep 17 00:00:00 2001 From: Arthur Moreira Date: Thu, 3 Oct 2019 03:27:17 -0300 Subject: [PATCH] Adding kwargs filters --- search_engine_parser/core/base.py | 16 +++++++++------- search_engine_parser/core/engines/github.py | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/search_engine_parser/core/base.py b/search_engine_parser/core/base.py index a025f4d..7d17dd1 100644 --- a/search_engine_parser/core/base.py +++ b/search_engine_parser/core/base.py @@ -76,7 +76,7 @@ def parse_query(query): :type query: str :rtype: str """ - return query.replace(" ", "%20") + return query.replace(" ", "%20").replace(":", "%3A") @staticmethod async def get_source(url): @@ -123,13 +123,17 @@ async def get_soup(self, url): html = await self.get_source(url) return BeautifulSoup(html, 'lxml') - def get_search_url(self, query=None, page=None): + def get_search_url(self, query=None, page=None, **kwargs): """ Return a formatted search url """ # Some URLs use offsets offset = (page * 10) - 9 - return self.search_url.format(query=query, page=page, offset=offset) + for key, value in kwargs.items(): + query += f" {key}:{value}" + parsed_query = self.parse_query(query) + print(self.search_url.format(query=parsed_query, page=page, offset=offset)) + return self.search_url.format(query=parsed_query, page=page, offset=offset) def get_results(self, soup): """ Get results from soup""" @@ -144,7 +148,7 @@ def get_results(self, soup): search_results = self.parse_result(results) return search_results - def search(self, query=None, page=None): + def search(self, query=None, page=None, **kwargs): """ Query the search engine @@ -154,15 +158,13 @@ def search(self, query=None, page=None): :type page: int :return: dictionary. Containing titles, links, netlocs and descriptions. """ - parsed_query = self.parse_query(query) # Get search Page Results loop = asyncio.get_event_loop() soup = loop.run_until_complete( self.get_soup( self.get_search_url( - parsed_query, page))) - + query, page, **kwargs))) return self.get_results(soup) async def async_search(self, query=None, page=None, callback=None): diff --git a/search_engine_parser/core/engines/github.py b/search_engine_parser/core/engines/github.py index 14ba7a7..671db67 100644 --- a/search_engine_parser/core/engines/github.py +++ b/search_engine_parser/core/engines/github.py @@ -48,7 +48,7 @@ def parse_single_result(self, single_result): lang = stars_and_lang_div.find( 'span', itemprop="programmingLanguage").text stars = stars_and_lang_div.find('a', class_='muted-link').text.strip() - + desc = desc.text title = "{title}\t {lang}\t Stars-{stars}".format( title=title, lang=lang, stars=stars)