In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
def scrape_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  
        soup = BeautifulSoup(response.content, 'html.parser')
        title = soup.find('title').get_text()
        description_tag = soup.find('meta', {'name': 'description'})
        description = description_tag.get('content') if description_tag else ''
        
        return {'title': title, 'description': description}
    except Exception as e:
        print(f"Error scraping data from {url}: {e}")
        return None

In [3]:
query = {
    "Identify the industry in which Canoo operates": [
        "https://www.statista.com/outlook/mmo/electric-vehicles/worldwide",
        "https://www.mckinsey.com/features/mckinsey-center-for-future-mobility/our-insights/electric-vehicles-whats-ahead",
        "https://www.ibisworld.com/united-states/market-research-reports/hybrid-electric-vehicle-manufacturing-industry/"
    ],
    "Analyze Canoo's main competitors": [
        "https://www.marketbeat.com/stocks/NYSE/GOEV/competitors-and-alternatives/",
        "https://www.cbinsights.com/company/evelozcity/alternatives-competitors",
        "https://rivian.com/newsroom/media"
    ],
    "Identify key trends in the market": [
        "https://www.bloomberg.com/news/newsletters/2024-01-09/electric-vehicle-market-looks-headed-for-22-growth-this-year",
        "https://www.grandviewresearch.com/industry-analysis/electric-vehicles-ev-market",
        "https://www.osti.gov/biblio/1974577"
    ],
    "Gather information on Canoo's financial performance": [
        "https://investors.canoo.com/",
        "https://finance.yahoo.com/quote/GOEV/?guccounter=1&guce_referrer=aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbS8&guce_referrer_sig=AQAAAGuYyay_qFBxwVtiZZT_v-Y1r6smaqPM69Y8sbhNBu8mh-pnM-VOe7Vly-Y44znLlpCd--qp2hL5Cy9Bn0g9fN5LIDHrmFLRFQws0FcVHN9CKk6IDXT8so8LH-uMMpna6wAGJO49vLRly3nOvMGVXeh1J2fsFTL2leuXV-HTy_h2",
        "https://www.marketwatch.com/investing/stock/goev"
    ]
}

In [4]:
def scrape_query_data(query, urls):
    data = []
    for url in urls:
        scraped_data = scrape_data(url)
        if scraped_data:
            data.append(scraped_data)
    return data

In [7]:
query_data = {}
for query, urls in query.items():
    query_data[query] = scrape_query_data(query, urls)

In [8]:
for query, data in query_data.items():
    df = pd.DataFrame(data)
    filename = query.lower().replace(' ', '_') + '.csv'
    df.to_csv(filename, index=False)