# Wikipedia Revenue Table Scrape â€” Pandas + Export

This section converts scraped table headers into clean column names, iterates through table rows, loads them into a Pandas DataFrame, and exports the result to CSV.


In [None]:
# Convert <th> header elements into clean column name strings
# Why: DataFrame columns require plain text labels rather than BeautifulSoup tag objects
world_table_titles = [title.text.strip() for title in world_titles]
print(world_table_titles)


In [None]:
# Import pandas for tabular storage and CSV export
# Why: pandas provides DataFrame construction, row insertion, and file output
import pandas as pd


In [None]:
# Create an empty DataFrame using the scraped header list
# Why: Establishes the schema before inserting scraped row records
df = pd.DataFrame(columns=world_table_titles)
df


In [None]:
# Extract all table rows
# Why: Each <tr> represents either a header row or a data row
column_data = table.find_all('tr')
column_data


In [None]:
# Iterate through rows (skipping the first row which is typically the header)
# Why: The first <tr> often contains <th> header cells, not <td> data cells
for row in column_data[1:]:
    row_data = row.find_all('td')
    individual_row_data = [data.text.strip() for data in row_data]
    print(individual_row_data)

    # Append the row to the DataFrame
    # Why: len(df) gives the next available index position
    length = len(df)
    df.loc[length] = individual_row_data


In [None]:
# Inspect the final DataFrame
# Why: Confirms row counts, column alignment, and extracted values before export
df


In [None]:
# Export to CSV
# Why: Creates a portable output for Excel/Tableau/SQL ingestion
# Replace the path below with your local project output location
output_path = r"C:\path\to\your\project\output\companies_1.csv"
df.to_csv(output_path, index=False)
