In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
#Load the webpage content
url = "https://en.wikipedia.org/wiki/List_of_active_Indian_military_aircraft"
response = requests.get(url)
response

<Response [200]>

In [3]:
#Parse the webpage content
soup = BeautifulSoup(response.text, "html.parser")
table = soup.find_all("table",class_="wikitable")

In [4]:
# Select the first relevant table
table = table[0]

In [5]:
# Extract rows
data = []
for row in table.find_all("tr"):
    cols = row.find_all("td")
    if cols:  # If the row contains data
        row_data = []
        for col in cols:
            # Extract text inside <a> tag if present, else get normal text
            link = col.find("a")
            if link:
                row_data.append(link.text.strip())  # Extract aircraft name from <a> tag
            else:
                row_data.append(col.text.strip())  # Extract normal text
        data.append(row_data)

In [7]:
# Convert the data into a pandas DataFrame
df=pd.DataFrame(data)

In [8]:
# Drop rows with missing values
df=df.dropna()

In [9]:
#drop last two irrelevent columns
df = df.iloc[:, :-2]
df

Unnamed: 0,0,1,2,3
0,Dassault Rafale,France,Multirole,DH
2,HAL Tejas,India,Multirole,Mk.1
5,Sukhoi Su-30MKI,Russia,Multirole,Su-30MKI Flanker H
6,Mikoyan MiG-29,Soviet Union,Multirole,Fulcrum
8,Dassault Mirage 2000,France,Multirole,-2000H
10,SEPECAT Jaguar,United Kingdom,Attack,IB
13,Mikoyan-Gurevich MiG-21,Soviet Union,Interceptor,MiG-21 Bison
14,Embraer R-99,Brazil,AEW&C,Netra Mk1
15,Beriev A-50,Soviet Union,AEW&C,A-50EI
16,Boeing 707,United States,SIGINT,707-337C Phalcon


In [10]:
#find missing cells
missing = df.isnull().sum()
missing

0    0
1    0
2    0
3    0
dtype: int64

In [11]:
#adding headers
headers= ['Aircraft', 'Origin', 'Type', 'Variant']
df.columns = headers
df

Unnamed: 0,Aircraft,Origin,Type,Variant
0,Dassault Rafale,France,Multirole,DH
2,HAL Tejas,India,Multirole,Mk.1
5,Sukhoi Su-30MKI,Russia,Multirole,Su-30MKI Flanker H
6,Mikoyan MiG-29,Soviet Union,Multirole,Fulcrum
8,Dassault Mirage 2000,France,Multirole,-2000H
10,SEPECAT Jaguar,United Kingdom,Attack,IB
13,Mikoyan-Gurevich MiG-21,Soviet Union,Interceptor,MiG-21 Bison
14,Embraer R-99,Brazil,AEW&C,Netra Mk1
15,Beriev A-50,Soviet Union,AEW&C,A-50EI
16,Boeing 707,United States,SIGINT,707-337C Phalcon


In [12]:
df.to_csv('indian_aircraft.csv', index=False)