In [201]:
from bs4 import BeautifulSoup
import re
import csv

## First let's import a file's contents:

In [9]:
def import_html_file(filename):
    """import an entire html file using BeautifulSoup"""
    with open(filename) as html_file:
        soup = BeautifulSoup(html_file, 'html.parser')
        return soup

soup = import_html_file('superbowl.html')

## Next let's see if we can extract exactly the content we want

In [12]:
def get_table(html):
    return html.find_all('table', 'wikitable sortable')[1]

table = get_table(soup)

print(table)

<table class="wikitable sortable">
<tr>
<th style="width: 3%;">Game</th>
<th style="width: 9%;">Date</th>
<th style="width: 14%;">Winning team</th>
<th style="width: 2%;">Score</th>
<th style="width: 14%;">Losing team</th>
<th style="width: 16%;">Venue</th>
<th style="width: 15%;">City</th>
<th style="width: 2%;">Attendance</th>
<th class="unsortable" style="width: 1%;">Ref</th>
</tr>
<tr>
<td style="text-align: center;"><span class="sortkey" style="display:none;">01 !</span><span class="sorttext"><a href="/wiki/Super_Bowl_I" title="Super Bowl I">I</a></span></td>
<td><span class="sortkey" style="display:none;speak:none">000000001967-01-15-0000</span><span style="white-space:nowrap">January 15, 1967</span></td>
<td style="background:#d0e7ff;"><span class="sortkey" style="display:none;">Green Bay Packers 01 !</span><span class="sorttext"><a href="/wiki/1966_Green_Bay_Packers_season" title="1966 Green Bay Packers season">Green Bay Packers</a><sup>‡</sup><br/>
<small>(1, 1–0)</small></spa

## Now let's grab the header for our csv

In [21]:
def get_header(table):
    th = table.find_all("th")
    return [th[0].get_text(), th[1].get_text(), th[2].get_text(), 
            th[3].get_text(), th[4].get_text(), th[5].get_text()]

header = get_header(table)
print(header)

['Game', 'Date', 'Winning team', 'Score', 'Losing team', 'Venue']


## Next let's grab the relevant data for the body of the csv

In [200]:
def get_data(table):
    data = []
    for row in table.find_all("tr"):
        cells = row.find_all("td")
        row_data = []
#         print(cells)
        
        if cells:
            # Game
            row_data.append(cells[0].a.get_text())
            
            # Date
            row_data.append(cells[1].find_all("span")[1].get_text().split(", ")[1])
            
            # Winning team
            prog = re.compile(r'[a-zA-Z ]*')
            row_data.append(prog.match(cells[2].span.get_text()).group(0).rstrip())
            
            # Score
            row_data.append(cells[3].find_all("span")[1].get_text())
            
            # Losing team
            row_data.append(prog.match(cells[4].span.get_text()).group(0).rstrip())
            
            # Venue
            row_data.append(cells[5].span.get_text().rstrip(' !'))
        
            data.append(row_data)
    
        
    # remove irrelevant data
    data = data[:-2]
    
    return data

data = get_data(table)
print(data)

[['I', '1967', 'Green Bay Packers', '35–10', 'Kansas City Chiefs', 'Los Angeles Memorial Coliseum 01'], ['II', '1968', 'Green Bay Packers', '33–14', 'Oakland Raiders', 'Miami Orange Bowl 01'], ['III', '1969', 'New York Jets', '16–7\xa0', 'Indianapolis Colts', 'Miami Orange Bowl 02'], ['IV', '1970', 'Kansas City Chiefs', '23–7\xa0', 'Minnesota Vikings', 'Tulane Stadium 01'], ['V', '1971', 'Indianapolis Colts', '16–13\xa0', 'Dallas Cowboys', 'Miami Orange Bowl 03'], ['VI', '1972', 'Dallas Cowboys', '24–3\xa0', 'Miami Dolphins', 'Tulane Stadium 02'], ['VII', '1973', 'Miami Dolphins', '14–7\xa0', 'Washington Redskins', 'Los Angeles Memorial Coliseum 02'], ['VIII', '1974', 'Miami Dolphins', '24–7\xa0', 'Minnesota Vikings', 'Rice Stadium 01'], ['IX', '1975', 'Pittsburgh Steelers', '16–6\xa0', 'Minnesota Vikings', 'Tulane Stadium 03'], ['X', '1976', 'Pittsburgh Steelers', '21–17', 'Dallas Cowboys', 'Miami Orange Bowl 04'], ['XI', '1977', 'Oakland Raiders', '32–14', 'Minnesota Vikings', 'Rose 

## Now that we have the data, it's time to write that data to the csv

In [223]:
def write_data_to_csv(header, data):
    
    with open("./result.csv", 'w') as csvfile:
        result = csv.writer(csvfile, delimiter = ',')
        
        # write the header of the csv first
        result.writerow([', '.join(header)])
        
        # now write the data
        for row in data:
            print(row)
            result.writerow([', '.join(row)])

write_data_to_csv(header, data)

Error: need to escape, but no escapechar set