# Yahoo Finance web scraping with python

We will extract the key financial data of the major players in the automotive market. 
**************************************************************************************************************************
BMW.DE = Bayerische Motoren Werke Aktiengesellschaft

VLKAF = Volkswagen AG

TM = Toyota Motor Corporation

F = Ford Motor Company

GM = General Motors Company
**************************************************************************************************************************

### Lets extract the data for BMW first

In [1]:
#Packages
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
#Initialisation
url = 'https://fr.finance.yahoo.com/quote/BMW.DE/key-statistics'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}

In [3]:
#Request
response = requests.get(url, headers=headers)

#Soup
if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')

In [4]:
#Deep dive in the HTML
tables = soup.find_all('table')

for table in tables:
    print(table.prettify())

<table class="W(100%) Bdcl(c)">
 <tbody>
  <tr class="Bxz(bb) H(36px) BdY Bdc($seperatorColor) fi-row Bgc($hoverBgColor):h">
   <td class="Pos(st) Start(0) Bgc($lv2BgColor) fi-row:h_Bgc($hoverBgColor) Pend(10px) Miw(140px)">
    <span>
     Cap. boursière (intrajournalière)
    </span>
    <sup aria-label="">
    </sup>
   </td>
   <td class="Fw(500) Ta(end) Pstart(10px) Miw(60px)">
    56.23B
   </td>
  </tr>
  <tr class="Bxz(bb) H(36px) BdB Bdbc($seperatorColor) fi-row Bgc($hoverBgColor):h">
   <td class="Pos(st) Start(0) Bgc($lv2BgColor) fi-row:h_Bgc($hoverBgColor) Pend(10px)">
    <span>
     Valeur de l’entreprise
    </span>
    <sup aria-label="">
    </sup>
   </td>
   <td class="Fw(500) Ta(end) Pstart(10px) Miw(60px)">
    106.32B
   </td>
  </tr>
  <tr class="Bxz(bb) H(36px) BdB Bdbc($seperatorColor) fi-row Bgc($hoverBgColor):h">
   <td class="Pos(st) Start(0) Bgc($lv2BgColor) fi-row:h_Bgc($hoverBgColor) Pend(10px)">
    <span>
     P/E précédent
    </span>
    <sup aria-lab

In [5]:
#Isolate the right table
financial_table = tables[7]
print(financial_table.prettify())

<table class="W(100%) Bdcl(c)">
 <tbody>
  <tr class="Bxz(bb) H(36px) BdY Bdc($seperatorColor)">
   <td class="Pos(st) Start(0) Bgc($lv2BgColor) fi-row:h_Bgc($hoverBgColor) Pend(10px) Miw(140px)">
    <span>
     Chiffre d'affaires
    </span>
    <!-- -->
    (ttm)
    <sup aria-label="">
    </sup>
   </td>
   <td class="Fw(500) Ta(end) Pstart(10px) Miw(60px)">
    155,26B
   </td>
  </tr>
  <tr class="Bxz(bb) H(36px) BdB Bdbc($seperatorColor)">
   <td class="Pos(st) Start(0) Bgc($lv2BgColor) fi-row:h_Bgc($hoverBgColor) Pend(10px)">
    <span>
     Revenu par action
    </span>
    <!-- -->
    (ttm)
    <sup aria-label="">
    </sup>
   </td>
   <td class="Fw(500) Ta(end) Pstart(10px) Miw(60px)">
    243,45
   </td>
  </tr>
  <tr class="Bxz(bb) H(36px) BdB Bdbc($seperatorColor)">
   <td class="Pos(st) Start(0) Bgc($lv2BgColor) fi-row:h_Bgc($hoverBgColor) Pend(10px)">
    <span>
     Croissance trimestrielle du chiffre d’affaires
    </span>
    <!-- -->
    (Sur 12 mois)
    <sup ar

In [6]:
#Extract the rows

rows=financial_table.find_all('tr')
data=[]
for row in rows:
    cols = row.find_all('td')
    cols = [col.text.strip() for col in cols]
    data.append(cols)
    print(cols)

["Chiffre d'affaires (ttm)", '155,26B']
['Revenu par action (ttm)', '243,45']
['Croissance trimestrielle du chiffre d’affaires (Sur 12\xa0mois)', '-0,60 %']
['Bénéfice brut (ttm)', 'S.O.']
['EBITDA', '22,74B']
['Bénéfice net disponible distribuable (ttm)', '10,66B']
['BPA dilué (ttm)', '16,78']
['Croissance trimestrielle des bénéfices (Sur 12\xa0mois)', '-18,40 %']


### Now we want to create a function that will gather all the information we need for each symbol.

In [7]:
# The fonction :

def extract_financial_data(symbol):
    url = f'https://fr.finance.yahoo.com/quote/{symbol}/key-statistics'
    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        page_content = response.text
    else:
        print(f"Error retrieving the page {symbol}")
        return
    
    soup = BeautifulSoup(page_content, 'html.parser')
    tables = soup.find_all('table')
    
    if len(tables) < 8:
        print(f"Less than 8 tables found for {symbol}")
        return
    
    # select  the 7th table
    financial_table = tables[7]
    
    # Extract the rows
    rows = financial_table.find_all('tr')
    data = []
    for row in rows:
        cols = row.find_all('td')
        cols = [col.text.strip() for col in cols]
        if cols:  # Adding non empty rows 
            data.append([symbol] + cols)
    
    # DataFrame
    df = pd.DataFrame(data, columns=['Symbol'] + [f'Col{i}' for i in range(1, len(data[0]))])
    
    return df

In [8]:
#Pull the data 
symbol = 'F'
df1 = extract_financial_data(symbol)
symbol ='VLKAF'
df2 = extract_financial_data(symbol)
symbol ='GM'
df3 = extract_financial_data(symbol)
symbol ='TM'
df4 = extract_financial_data(symbol)
symbol ='BMW.DE'
df5 = extract_financial_data(symbol)

df_append=df1.append([df2,df3,df4,df5])
print(df_append)

   Symbol                                               Col1      Col2
0       F                           Chiffre d'affaires (ttm)   177,49B
1       F                            Revenu par action (ttm)     44,43
2       F  Croissance trimestrielle du chiffre d’affaires...    3,10 %
3       F                                Bénéfice brut (ttm)      S.O.
4       F                                             EBITDA    11,08B
5       F         Bénéfice net disponible distribuable (ttm)     3,92B
6       F                                    BPA dilué (ttm)      0,97
7       F  Croissance trimestrielle des bénéfices (Sur 12...  -24,20 %
0   VLKAF                           Chiffre d'affaires (ttm)   321,55B
1   VLKAF                            Revenu par action (ttm)    641,43
2   VLKAF  Croissance trimestrielle du chiffre d’affaires...   -1,00 %
3   VLKAF                                Bénéfice brut (ttm)      S.O.
4   VLKAF                                             EBITDA    31,79B
5   VL

In [9]:
# Export the data to CSV
if df_append is not None:
    df_append.to_csv('financial_data.csv', index=False)

### Your data is now waiting for you to be cleaned up and analyzed!