diff --git a/src/rss-generator.py b/src/rss-generator.py index beb830cc..a7d89a58 100644 --- a/src/rss-generator.py +++ b/src/rss-generator.py @@ -143,45 +143,19 @@ def yahoo_search(query): [[Tile1,url1], [Title2, url2],..] ''' urls = [] - response = get_google_page(query) - soup = BeautifulSoup(response.text, 'html.parser') - # Search for all relevant 'h2' tags - for h in soup.findAll('h2'): - - links = h.find('li') - - urls.append([links.getText(),links.get('href')]) - - return urls -def get_yahoo_page(query): - ''' Fetch the yahoo search results - Returns : Results Page - ''' - header = {'User-Agent': - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36" - } - - payload = {'q' : query} - response = requests.get('https://search.yahoo.com/search', headers=header, params=payload) - - return response - -def yahoo_search(query): - ''' Gives search query to yahoo and returns the urls - Returns: urls (list) - [[Tile1,url1], [Title2, url2],..] - ''' - urls = [] - response = get_google_page(query) - soup = BeautifulSoup(response.text, 'html.parser') - # Search for all relevant 'h2' tags - for h in soup.findAll('h2'): + response = get_yahoo_page(query) + soup = BeautifulSoup(response.content,"lxml") + + for h in soup.findAll('h3',attrs={"class" : "title"}): + t=h.findAll('a',attrs={"class" : " ac-algo fz-l ac-21th lh-24"}) + for y in t: + r=y.get('href') + f=r.split("RU=") + e=f[-1].split("/RK=0") + u=e[0].replace("%3a",":").replace("%2f","/").replace("%28","(").replace("%29",")").replace("%3f","?").replace("%3d","=").replace("%26","&").replace("%29",")").replace("%26","'").replace("%21","!").replace("%23","$").replace("%40","[").replace("%5b","]") + urls.append([y.getText(),u]) + return urls - links = h.find('li') - - urls.append([links.getText(),links.get('href')]) - - return urls def read_in(): lines = sys.stdin.readlines()