In [14]:
# Import Packages
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [15]:
# Download and parse the HTML
start_url = 'https://en.wikipedia.org/wiki/Tesla,_Inc.'

# Download the HTML from start_url
downloaded_html = requests.get(start_url)

# Parse the HTML with BeautifulSoup and create a soup object
soup = BeautifulSoup(downloaded_html.text)

# Save a local Copy
with open('downloaded.html','w') as file:
    file.write(soup.prettify())

In [16]:
# Select table.wikitable
full_table = soup.select('table.wikitable tbody')[0]
print(full_table)

<tbody><tr style="text-align:center;">
<th>Quarter</th>
<th>Cumulative<br/>production</th>
<th>Total<br/>production</th>
<th>Model S<br/>sales</th>
<th>Model X<br/>sales</th>
<th>Model 3<br/>sales
</th>
<th>Model Y<br/>sales<sup class="reference" id="cite_ref-102"><a href="#cite_note-102">[b]</a></sup></th>
<th>Total<br/>sales<sup class="reference" id="cite_ref-103"><a href="#cite_note-103">[c]</a></sup></th>
<th>In transit<sup class="reference" id="cite_ref-104"><a href="#cite_note-104">[d]</a></sup></th>
<th>Source
</th></tr>
<tr style="text-align:center;">
<td>Q1 2013</td>
<td>N/A</td>
<td>5,000+</td>
<td>4,900</td>
<td style="background:#f1f5fa;"></td>
<td style="background:#f1f5fa;"></td>
<td style="background:#f1f5fa;"></td>
<td>4,900</td>
<td></td>
<td><sup class="reference" id="cite_ref-105"><a href="#cite_note-105">[101]</a></sup>
</td></tr>
<tr style="text-align:center;">
<td>Q2 2013</td>
<td>N/A</td>
<td>N/A</td>
<td>5,150</td>
<td style="background:#f1f5fa;"></td>
<td style

In [18]:
# Extract the table column headings
# End result: A list with all the column headings

table_head = full_table.select('tr th')
import re
regex = re.compile('_\[\w\]')
table_columns = []

for element in table_head:
    column_label = element.get_text(separator=" ", strip=True)
    column_label = column_label.replace(' ','_')
    column_label = regex.sub('',column_label)
    table_columns.append(column_label)
    print(column_label)

print('------------------')
print(table_columns)

Quarter
Cumulative_production
Total_production
Model_S_sales
Model_X_sales
Model_3_sales
Model_Y_sales
Total_sales
In_transit
Source
------------------
['Quarter', 'Cumulative_production', 'Total_production', 'Model_S_sales', 'Model_X_sales', 'Model_3_sales', 'Model_Y_sales', 'Total_sales', 'In_transit', 'Source']


In [20]:
# Extract the table data (rows)
# End result: A multi-dimensional list containig a list for each row

table_rows = full_table.select('tr')
table_data = []
for index, element in enumerate(table_rows):
    if index > 0:
        row_list = []
        values = element.select('td')
        for value in values:
            row_list.append(value.text.strip())
        table_data.append(row_list)
        
print(table_data)

[['Q1 2013', 'N/A', '5,000+', '4,900', '', '', '', '4,900', '', '[101]'], ['Q2 2013', 'N/A', 'N/A', '5,150', '', '', '', '5,150', '', '[102]'], ['Q3 2013', 'N/A', 'N/A', '5,500+', '', '', '', '5,500+', '', '[103]'], ['Q4 2013', '~34,851', '6,587', '6,892', '', '', '', '6,892', '', '[104]'], ['Q1 2014', '~41,438', '7,535', '6,457', '', '', '', '6,457', '', '[105]'], ['Q2 2014', '~48,973', '8,763', '7,579', '', '', '', '7,579', '', '[106]'], ['Q3 2014', '~57,736', '~7,075', '7,785', '', '', '', '7,785', '', '[107]'], ['Q4 2014', '64,811', '11,627', '9,834', '', '', '', '9,834', '', '[108]'], ['Q1 2015', '76,438', '11,160', '10,045', '', '', '', '10,045', '', '[109]'], ['Q2 2015', '89,245', '12,807', '11,532', '', '', '', '11,532', '', '[110]'], ['Q3 2015', '102,336', '13,091', '11,597', '6', '', '', '11,603', '', '[111]'], ['Q4 2015', '116,373', '14,037', '17,272', '206', '', '', '17,478', '', '[112]'], ['Q1 2016', '131,883', '15,510', '12,420', '2,400', '', '', '14,820', '2,615', '[113]

In [21]:
# Create a Pandas DataFrame

df = pd.DataFrame(table_data, columns=table_columns)
df

Unnamed: 0,Quarter,Cumulative_production,Total_production,Model_S_sales,Model_X_sales,Model_3_sales,Model_Y_sales,Total_sales,In_transit,Source
0,Q1 2013,,"5,000+",4900,,,,4900,,[101]
1,Q2 2013,,,5150,,,,5150,,[102]
2,Q3 2013,,,"5,500+",,,,"5,500+",,[103]
3,Q4 2013,"~34,851",6587,6892,,,,6892,,[104]
4,Q1 2014,"~41,438",7535,6457,,,,6457,,[105]
5,Q2 2014,"~48,973",8763,7579,,,,7579,,[106]
6,Q3 2014,"~57,736","~7,075",7785,,,,7785,,[107]
7,Q4 2014,64811,11627,9834,,,,9834,,[108]
8,Q1 2015,76438,11160,10045,,,,10045,,[109]
9,Q2 2015,89245,12807,11532,,,,11532,,[110]
