In [18]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

API_KEY = open('API_KEY').read()
SEARCH_ENGINE_ID = open("SEARCH_ENGINE_ID").read()

search_query = 'Canoo'

url = 'https://www.googleapis.com/customsearch/v1'
params = {
    'q': search_query,
    'key': API_KEY,
    'cx': SEARCH_ENGINE_ID
}

response = requests.get(url, params = params)
result = response.json()
if 'items' in result:
    print(result['items'][0]['link'])
    print(result['items'][2]['link'])
    print(result['items'][3]['link'])
    print(result['items'][4]['link'])
    print(result['items'][6]['link'])
    

https://www.canoo.com/
https://www.canoo.com/pickup/
https://investors.canoo.com/
https://www.canoo.com/canoo/
https://finance.yahoo.com/quote/GOEV/


In [19]:
def google_search(query):
    url = f"https://www.googleapis.com/customsearch/v1?key={API_KEY}&cx={SEARCH_ENGINE_ID}&q={query}&num=10"
    response = requests.get(url)
    data = response.json()
    return data.get('items', [])

def extract_data(word):
    results = google_search(word)
    extracted_data = []
    for result in results:
        item = {
            'title': result.get('title', ''),
            'link': result.get('link', ''),
            'snippet': result.get('snippet', ''),
            'source': result.get('displayLink', '')
        }
        extracted_data.append(item)
    return extracted_data

def save_to_csv(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)

if __name__ == "__main__":
    word = "Canoo"
    data = extract_data(word)
    save_to_csv(data, f"{word}_data.csv")

In [20]:
url = 'https://investors.canoo.com/financial-information/income-statement'
page = requests.get(url)
page.content
soup = BeautifulSoup(page.content, 'html.parser')



In [23]:
income_statement = soup.find('table', {'class' :'report'})

print(income_statement.prettify())
rows = income_statement.find_all('tr')

for row in rows:
    cells = row.find_all(['th', 'td'])
    for cell in cells:
        print(cell.text.strip())

income_statement1= soup.find('table', {'class': 'report'})

headers = []
for th in income_statement1.find_all('th'):
    headers.append(th.text.strip())

rows = []
for tr in income_statement1.find_all('tr'):
    row = [td.text.strip() for td in tr.find_all(['td', 'th'])]
    if row:
        rows.append(row)

print(rows)
df = pd.DataFrame(rows, columns = ['Consolidated Statements in(USD Thousands)', 'Sep. 30, 2023', 'Sep. 30, 2022', 'Sep. 30, 2023', 'Sep. 30, 2022'])
df.to_csv('income_data.csv')

<table border="0" cellspacing="2" class="report" id="idm140166111035248">
 <tr>
  <th class="tl" colspan="1" rowspan="2">
   <div style="width: 200px;">
    <strong>
     Condensed Consolidated Statements of Operations - USD ($)
     <br/>
     shares in Thousands, $ in Thousands
    </strong>
   </div>
  </th>
  <th class="th" colspan="2">
   3 Months Ended
  </th>
  <th class="th" colspan="2">
   9 Months Ended
  </th>
 </tr>
 <tr>
  <th class="th">
   <div>
    Sep. 30, 2023
   </div>
  </th>
  <th class="th">
   <div>
    Sep. 30, 2022
   </div>
  </th>
  <th class="th">
   <div>
    Sep. 30, 2023
   </div>
  </th>
  <th class="th">
   <div>
    Sep. 30, 2022
   </div>
  </th>
 </tr>
 <tr class="re">
  <td class="pl" style="border-bottom: 0px;" valign="top">
   <a class="a" href="javascript:void(0);" onclick="Show.showAR( this, 'defref_us-gaap_IncomeStatementAbstract', window );">
    <strong>
     Income Statement [Abstract]
    </strong>
   </a>
  </td>
  <td class="text">
   <sp

In [24]:
response = requests.get("https://investors.canoo.com/financial-information/balance-sheet")
response.status_code



data = BeautifulSoup(response.content, 'html.parser')
print(data.prettify())




balance_sheet = data.find('table', {'class': 'report'})

headers = []
for th in balance_sheet.find_all('th'):
    headers.append(th.text.strip())
    
rows = []
for tr in balance_sheet.find_all('tr'):
    row = [td.text.strip() for td in tr.find_all(['td', 'th'])]
    rows.append(row)
print(rows)

df = pd.DataFrame(rows, columns = ['Current assets', 'Sep. 30, 2023', 'Dec. 31, 2022'])
df.to_csv('balance_data.csv')

<!DOCTYPE html>
<html lang="en">
 <head>
  <!-- OneTrust Cookies Consent -->
  <!-- OneTrust Production Script -->
  <script charset="UTF-8" data-domain-script="2685d8a8-9e2d-41c4-9cbb-b4c498f5ad6e" src="https://cdn.cookielaw.org/scripttemplates/otSDKStub.js" type="text/javascript">
  </script>
  <script type="text/javascript">
   function OptanonWrapper() { }
  </script>
  <!-- End OneTrust Cookies Consent  -->
  <meta charset="utf-8"/>
  <meta content="ie=edge" http-equiv="x-ua-compatible"/>
  <base href="https://investors.canoo.com"/>
  <link href="https://investors.canoo.com/news-presentations/press-releases/rss" rel="alternate" title="Canoo Inc. - Recent News" type="application/rss+xml"/>
  <title>
   Balance Sheet :: Canoo Inc. (GOEV)
  </title>
  <link href="https://investors.canoo.com/financial-information/balance-sheet" rel="canonical"/>
  <meta content="https://investors.canoo.com/financial-information/balance-sheet" property="og:url"/>
  <meta content="origin" name="referrer

In [27]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def website(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup
    else:
        print("Failed to fetch data from the URL:", url)
        return None
        
def extract_text(soup):
    if soup:
        return soup.get_text(separator=' ')
    else:
        return ""

def retrieve_documents(query, documents):
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform([query] + documents)
    similarity_matrix = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix)[0]
    most_similar_indices = similarity_matrix.argsort()[:-len(documents)-1:-1][1:]
    return most_similar_indices

def extract_industry_info(soup):
    industry = extract_text(soup)
    return industry

def extract_competitors_info(soup):
    competitors = ["Tesla", "NIO", "Rivian", "Lucid Motors"]
    return competitors

def extract_market_trends(soup):
    trends = ["Increasing demand for electric vehicles", "Advancements in battery technology", "Competition intensifying in the EV market"]
    return trends

def extract_financial_performance(soup):
    financial_metrics = {
        "Revenue": "$123 million",
        "Profit Margins": "N/A",
        "Return on Investment": "N/A",
        "Expense Structure": "Detailed financial statements required"
    }
    return financial_metrics

def main():
    canoo_website_url = "https://www.canoo.com/"
    canoo_soup = scrape_website(canoo_website_url)

    if canoo_soup:
        industry_info = extract_industry_info(canoo_soup)
        competitors_info = extract_competitors_info(canoo_soup)
        market_trends = extract_market_trends(canoo_soup)
        financial_performance = extract_financial_performance(canoo_soup)
        query = "Canoo electric vehicle industry"
        documents = [industry_info] + competitors_info + market_trends + [financial_performance["Expense Structure"]]
        most_similar_indices = retrieve_documents(query, documents)
        print("Most relevant documents:")
        for idx in most_similar_indices:
            print(documents)

        df = pd.DataFrame({
            "Category": ["Industry", "Competitors", "Market Trends", "Financial Performance"],
            "Information": [industry_info, competitors_info, market_trends, financial_performance]
        })
        df.to_csv("canoo_info.csv", index=False)

if __name__ == "__main__":
    main()

Most relevant documents:
['', 'Tesla', 'NIO', 'Rivian', 'Lucid Motors', 'Increasing demand for electric vehicles', 'Advancements in battery technology', 'Competition intensifying in the EV market', 'Detailed financial statements required']
['', 'Tesla', 'NIO', 'Rivian', 'Lucid Motors', 'Increasing demand for electric vehicles', 'Advancements in battery technology', 'Competition intensifying in the EV market', 'Detailed financial statements required']
['', 'Tesla', 'NIO', 'Rivian', 'Lucid Motors', 'Increasing demand for electric vehicles', 'Advancements in battery technology', 'Competition intensifying in the EV market', 'Detailed financial statements required']
['', 'Tesla', 'NIO', 'Rivian', 'Lucid Motors', 'Increasing demand for electric vehicles', 'Advancements in battery technology', 'Competition intensifying in the EV market', 'Detailed financial statements required']
['', 'Tesla', 'NIO', 'Rivian', 'Lucid Motors', 'Increasing demand for electric vehicles', 'Advancements in battery 

In [None]:
url = f"https://finance.yahoo.com/quote/GOEV/?guccounter=1&guce_referrer=aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbS8&guce_referrer_sig=AQAAAEzfFvGMcz4wylBe-_wG9ddbTi1qEd3dRKt7q3Q-NlOGl55thuhzXJiL9sUx_5vh52ISaLzeVZA774HcR8b2ofSLBOjRhDioBfhjzWWizuKEwTHFndtr4xHyZREWXSasFrb3eCy3cEQXu9GTjsOKHc0xGP6KcYvlloi7dBMenzqv"

response = requests.get(url)

content = response.content
parsed_content = BeautifulSoup(content,'html.parser')

obj = {}
try:
  obj["name"] = parsed_content.find("h1").text.strip()
except:
  obj["name"] = None
try:
  obj["stock_currentprice"] = parsed_content.find("span", {"class": "e3b14781 e59c8479"}).text.strip()
except:
  obj["stock_currentprice"] = None

tables = parsed_content.find("table").find_all("tbody")

extracted_text = []
for row in parsed_content.find_all('tr'):
    row_text = [cell.get_text(strip=True) for cell in row.find_all(['td', 'th'])]
    extracted_text.append(row_text)
for row in extracted_text:
    print(row)
print(obj)

df = pd.DataFrame(extracted_text)
obj_df = pd.DataFrame([obj])

combined_df = pd.concat([obj_df, df], ignore_index=True)

combined_df.to_csv('combined_data.csv', index=False)