Skip to content

Commit

Permalink
Fix yahoo search implementation (#42)
Browse files Browse the repository at this point in the history
  • Loading branch information
abhishek1995s authored and niranjan94 committed Mar 20, 2017
1 parent ec936ba commit 14936fe
Showing 1 changed file with 12 additions and 38 deletions.
50 changes: 12 additions & 38 deletions src/rss-generator.py
Expand Up @@ -143,45 +143,19 @@ def yahoo_search(query):
[[Tile1,url1], [Title2, url2],..]
'''
urls = []
response = get_google_page(query)
soup = BeautifulSoup(response.text, 'html.parser')
# Search for all relevant 'h2' tags
for h in soup.findAll('h2'):

links = h.find('li')

urls.append([links.getText(),links.get('href')])

return urls
def get_yahoo_page(query):
''' Fetch the yahoo search results
Returns : Results Page
'''
header = {'User-Agent':
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36"
}

payload = {'q' : query}
response = requests.get('https://search.yahoo.com/search', headers=header, params=payload)

return response

def yahoo_search(query):
''' Gives search query to yahoo and returns the urls
Returns: urls (list)
[[Tile1,url1], [Title2, url2],..]
'''
urls = []
response = get_google_page(query)
soup = BeautifulSoup(response.text, 'html.parser')
# Search for all relevant 'h2' tags
for h in soup.findAll('h2'):
response = get_yahoo_page(query)
soup = BeautifulSoup(response.content,"lxml")

for h in soup.findAll('h3',attrs={"class" : "title"}):
t=h.findAll('a',attrs={"class" : " ac-algo fz-l ac-21th lh-24"})
for y in t:
r=y.get('href')
f=r.split("RU=")
e=f[-1].split("/RK=0")
u=e[0].replace("%3a",":").replace("%2f","/").replace("%28","(").replace("%29",")").replace("%3f","?").replace("%3d","=").replace("%26","&").replace("%29",")").replace("%26","'").replace("%21","!").replace("%23","$").replace("%40","[").replace("%5b","]")
urls.append([y.getText(),u])
return urls

links = h.find('li')

urls.append([links.getText(),links.get('href')])

return urls

def read_in():
lines = sys.stdin.readlines()
Expand Down

0 comments on commit 14936fe

Please sign in to comment.