In [2]:
import requests
from bs4 import BeautifulSoup
import time
import random
import pandas as pd

def extract_search_results(html_content):
    """Extract titles, links, descriptions, and website titles from Bing search results HTML."""
    soup = BeautifulSoup(html_content, 'html.parser')
    results = []
    
    # Find all search result items
    result_items = soup.find_all('li', class_='b_algo')
    
    for item in result_items:
        try:
            # Extract title and link
            title_tag = item.find('h2')
            title = title_tag.get_text() if title_tag else None
            link = title_tag.find('a')['href'] if title_tag and title_tag.find('a') else None
            
            # Extract description
            description_tag = item.find('div', class_='b_caption')
            description = description_tag.get_text() if description_tag else None
            
            # Extract website title (cite)
            cite_tag = item.find('cite')
            website_title = cite_tag.get_text() if cite_tag else None
            
            if title and link:  # Only add if we have basic info
                results.append({
                    'title': title.strip(),
                    'link': link,
                    'description': description.strip() if description else None,
                    'website_title': website_title.strip() if website_title else None
                })
        except Exception as e:
            print(f"Error processing result: {e}")
            continue
    
    return results

def scrape_bing_search(query, pages=1):
    """Scrape Bing search results for multiple pages."""
    base_url = "https://www.bing.com/search"
    all_results = []
    
    for page in range(pages):
        try:
            # Bing pagination typically uses first= parameter (10 results per page)
            first_result = page * 10
            
            querystring = {
                "q": query,
                "form": "ANNH02",
                "refig": "b102c7ceb43f4930bbff6199cd48cead",
                "pc": "U531",
                "first": first_result
            }
            
            headers = {
                "cookie": "MUIDB=39246286B8EC6CF00FE271B7B92C6DB7; _SS=SID%3D2BC6754FF79E604306AF6088F6A561DF%26PC%3DU531%26R%3D12%26RB%3D0%26GB%3D0%26RG%3D200%26RP%3D9; SRCHS=PC%3DU531",
                "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
                "accept-language": "en-GB,en;q=0.9,zh-HK;q=0.8,zh-CN;q=0.7,zh;q=0.6,ja;q=0.5,en-US;q=0.4",
                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0",
            }
            
            print(f"Scraping page {page + 1}...")
            response = requests.get(base_url, headers=headers, params=querystring)
            response.raise_for_status()
            
            page_results = extract_search_results(response.text)
            all_results.extend(page_results)
            
            # Random delay to avoid rate limiting
            time.sleep(random.uniform(1, 3))
            
        except Exception as e:
            print(f"Error scraping page {page + 1}: {e}")
            continue
    
    return all_results

if __name__ == "__main__":
    # Scrape 10 pages of results for "climate change"
    search_results = scrape_bing_search("climate change", pages=10)
    
    # Print the results
    for i, result in enumerate(search_results, 1):
        print(f"\nResult {i}:")
        print(f"Title: {result['title']}")
        print(f"Link: {result['link']}")
        print(f"Website: {result['website_title']}")
        print(f"Description: {result['description']}")
    
    print(f"\nTotal results collected: {len(search_results)}")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...

Result 1:
Title: What Is Climate Change? - NASA Science
Link: https://www.bing.com/ck/a?!&&p=155d34f4a0c7793209388ea32b399f9eebaf37a586ec02ec9e697c36bc22ca1bJmltdHM9MTc0Mzk4NDAwMA&ptn=3&ver=2&hsh=4&fclid=39246286-b8ec-6cf0-0fe2-71b7b92c6db7&psq=climate+change&u=a1aHR0cHM6Ly9zY2llbmNlLm5hc2EuZ292L2NsaW1hdGUtY2hhbmdlL3doYXQtaXMtY2xpbWF0ZS1jaGFuZ2Uv&ntb=1
Website: https://science.nasa.gov › climate-change › what-is-climate-change
Description: 21 Oct 2024 · Climate change is a long-term change in the average weather patterns that have come to define Earth’s local, regional and global climates. These changes have a broad range …

Result 2:
Title: What is climate change? A really simple guide - BBC News
Link: https://www.bing.com/ck/a?!&&p=7f9146e0c4cb8e936f45a24d9fb40b701923d37afa2030c3fcfa25888d2f5761

In [3]:
df = pd.DataFrame(search_results)
df.to_csv('bing_search_results.csv', encoding='utf-8-sig', index=False)