STEP 1 — Install & Import Libraries

In [19]:
!pip install requests beautifulsoup4 pandas


In [20]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


STEP 2 — Send HTTP Request to Website

In [21]:
url = "https://en.wikipedia.org/wiki/List_of_Falcon_9_and_Falcon_Heavy_launches"

# User-Agent isliay daal rahe hain ke website ko lage browser se aa raha hai
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}

response = requests.get(url, headers=headers)

print("Status code:", response.status_code)


Status code: 200


In [22]:
# YAHAN BAS YEHI PARSER USE KARNA HAI
soup = BeautifulSoup(response.text, "html.parser")

print("Page title:", soup.title.string)


Page title: List of Falcon 9 and Falcon Heavy launches - Wikipedia


In [23]:
tables = soup.find_all("table", class_="wikitable")
print("Total tables found:", len(tables))


Total tables found: 5


In [24]:
launch_table = tables[0]  # agar teacher ne specific table batayi ho to index change kar sakte ho

type(launch_table)


bs4.element.Tag

In [25]:
header_row = launch_table.find("tr")
headers = [th.get_text(strip=True) for th in header_row.find_all("th")]

print("Number of headers:", len(headers))
print(headers)


Number of headers: 10
['Flight No.', 'Date andtime (UTC)', 'Version,booster[j]', 'Launchsite', 'Payload[k]', 'Payload mass', 'Orbit', 'Customer', 'Launchoutcome', 'Boosterlanding']


In [26]:
rows = []

# pehle row mein sirf headers thay, is liye [1:] se start
for tr in launch_table.find_all("tr")[1:]:
    tds = tr.find_all("td")
    if not tds:
        continue  # skip empty rows

    row = [td.get_text(strip=True) for td in tds]

    # agar row choti ho headers se, to khaali string add kar do
    if len(row) < len(headers):
        row.extend([""] * (len(headers) - len(row)))
    # agar row zyada lambi ho, to cut kar do
    elif len(row) > len(headers):
        row = row[:len(headers)]

    rows.append(row)

print("Total data rows:", len(rows))


Total data rows: 272


In [27]:
df = pd.DataFrame(rows, columns=headers)
df.head()


Unnamed: 0,Flight No.,Date andtime (UTC),"Version,booster[j]",Launchsite,Payload[k],Payload mass,Orbit,Customer,Launchoutcome,Boosterlanding
0,"January 3, 202403:44[23]",F9B5B1082‑1,"Vandenberg,SLC‑4E",Starlink:Group 7-9(22 satellites),"~16,800 kg (37,000 lb)",LEO,SpaceX,Success,Success (OCISLY),
1,"Launch of 22 Starlink v2mini satellites, inclu...",,,,,,,,,
2,"January 3, 202423:04[24]",F9B5B1076‑10,"Cape Canaveral,SLC‑40",Ovzon-3,"1,800 kg (4,000 lb)",GTO,Ovzon,Success,Success (LZ‑1),
3,Broadband internet provider satellite.[25]Firs...,,,,,,,,,
4,"January 7, 202422:35[28]",F9B5B1067‑16,"Cape Canaveral,SLC‑40",Starlink:Group 6-35(23satellites),"~17,100 kg (37,700 lb)",LEO,SpaceX,Success,Success (ASOG),


In [28]:
print(df.columns)


Index(['Flight No.', 'Date andtime (UTC)', 'Version,booster[j]', 'Launchsite',
       'Payload[k]', 'Payload mass', 'Orbit', 'Customer', 'Launchoutcome',
       'Boosterlanding'],
      dtype='object')


In [29]:
# sirf pehle 6 columns rakh lete hain for safety
df_clean = df.iloc[:, :6]

df_clean.head()


Unnamed: 0,Flight No.,Date andtime (UTC),"Version,booster[j]",Launchsite,Payload[k],Payload mass
0,"January 3, 202403:44[23]",F9B5B1082‑1,"Vandenberg,SLC‑4E",Starlink:Group 7-9(22 satellites),"~16,800 kg (37,000 lb)",LEO
1,"Launch of 22 Starlink v2mini satellites, inclu...",,,,,
2,"January 3, 202423:04[24]",F9B5B1076‑10,"Cape Canaveral,SLC‑40",Ovzon-3,"1,800 kg (4,000 lb)",GTO
3,Broadband internet provider satellite.[25]Firs...,,,,,
4,"January 7, 202422:35[28]",F9B5B1067‑16,"Cape Canaveral,SLC‑40",Starlink:Group 6-35(23satellites),"~17,100 kg (37,700 lb)",LEO


In [30]:
df_clean.columns = [f"Column_{i+1}" for i in range(df_clean.shape[1])]
df_clean.head()


Unnamed: 0,Column_1,Column_2,Column_3,Column_4,Column_5,Column_6
0,"January 3, 202403:44[23]",F9B5B1082‑1,"Vandenberg,SLC‑4E",Starlink:Group 7-9(22 satellites),"~16,800 kg (37,000 lb)",LEO
1,"Launch of 22 Starlink v2mini satellites, inclu...",,,,,
2,"January 3, 202423:04[24]",F9B5B1076‑10,"Cape Canaveral,SLC‑40",Ovzon-3,"1,800 kg (4,000 lb)",GTO
3,Broadband internet provider satellite.[25]Firs...,,,,,
4,"January 7, 202422:35[28]",F9B5B1067‑16,"Cape Canaveral,SLC‑40",Starlink:Group 6-35(23satellites),"~17,100 kg (37,700 lb)",LEO


In [31]:
df_clean.to_csv("spacex_webscraped_launches.csv", index=False)


In this lab, I performed web scraping using the requests library to download an HTML page and BeautifulSoup to parse the HTML content. I extracted the main launch table from the Wikipedia page for Falcon 9 and Falcon Heavy launches by locating tables with the wikitable class. After identifying the appropriate table, I parsed the header row and data rows, handled inconsistent row lengths safely, and converted the extracted data into a structured pandas DataFrame. Finally, I saved a cleaned version of the table as a CSV file for further analysis and visualization.