In [1]:
def get_wiki_page(url):
    import requests
    try:
        response = requests.get(url,timeout=10,verify=False)
        response.raise_for_status()
        
        return response.text
    except requests.RequestException as e:
        print(f"An error occured : {e}")

In [2]:
from bs4 import BeautifulSoup
html = get_wiki_page("https://en.wikipedia.org/wiki/List_of_association_football_stadiums_by_capacity")
soup = BeautifulSoup(html,features='html.parser')
table = soup.find_all("table",attrs={"class":"wikitable sortable sticky-header"})[0]

table_rows = table.find_all('tr')




In [38]:
class Stadium:
    def __init__(self, stadium=None, seating_cap=None, region=None, country=None, city=None, image_url=None, home_team=None, rank=None):
        self.stadium = stadium
        self.seating_cap = seating_cap
        self.region = region
        self.country = country
        self.city = city
        self.image_url = image_url
        self.home_team = home_team
        self.rank = rank

    def __repr__(self):
        return f"Stadium(stadium={self.stadium}, rank={self.rank})"

In [39]:
list_of_stadium=[] 
for i in range(1,len(table_rows)):
  cells = table_rows[i].find_all('td')
  stadium = Stadium(
  rank = i,
  stadium = cells[0].get_text().strip().replace('♦',''),
  seating_cap = cells[1].get_text().strip().split('[')[0].replace(',','') if '[' in cells[1].get_text() else cells[1].get_text().strip().replace(',',''),
  region = cells[2].get_text().strip(),
  country = cells[3].get_text().strip(),
  city = cells[4].get_text().strip(),
  image_url = cells[5].find('img')['src'] if cells[5].find('img') else 'No Image',
  home_team = cells[6].get_text().strip()
  )
  list_of_stadium.append(stadium)
list_of_stadium

[Stadium(stadium=Rungrado 1st of May Stadium , rank=1),
 Stadium(stadium=Michigan Stadium, rank=2),
 Stadium(stadium=Ohio Stadium, rank=3),
 Stadium(stadium=Melbourne Cricket Ground , rank=4),
 Stadium(stadium=Camp Nou , rank=5),
 Stadium(stadium=Estadio Azteca , rank=6),
 Stadium(stadium=FNB Stadium , rank=7),
 Stadium(stadium=New Administrative Capital Stadium , rank=8),
 Stadium(stadium=Rose Bowl Stadium, rank=9),
 Stadium(stadium=Cotton Bowl Stadium, rank=10),
 Stadium(stadium=Wembley Stadium , rank=11),
 Stadium(stadium=Lusail Stadium , rank=12),
 Stadium(stadium=Bukit Jalil National Stadium , rank=13),
 Stadium(stadium=Borg el-Arab Stadium, rank=14),
 Stadium(stadium=Estadio Santiago Bernabéu, rank=15),
 Stadium(stadium=Estadio Mâs Monumental , rank=16),
 Stadium(stadium=Stadium Australia, rank=17),
 Stadium(stadium=MetLife Stadium, rank=18),
 Stadium(stadium=Croke Park , rank=19),
 Stadium(stadium=Jakarta International Stadium , rank=20),
 Stadium(stadium=Lambeau Field, rank=21)

In [40]:
import pandas as pd
# Convert list of stadium objects to DataFrame
data = {
    'Rank': [s.rank for s in list_of_stadium],
    'Stadium': [s.stadium for s in list_of_stadium],
    'Seating Capacity': [s.seating_cap for s in list_of_stadium],
    'Region': [s.region for s in list_of_stadium],
    'Country': [s.country for s in list_of_stadium],
    'City': [s.city for s in list_of_stadium],
    'Image URL': [s.image_url for s in list_of_stadium],
    'Home Team': [s.home_team for s in list_of_stadium]
}
list_of_stadium_df = pd.DataFrame(data)


Unnamed: 0,Rank,Stadium,Seating Capacity,Region,Country,City,Image URL,Home Team
0,1,Rungrado 1st of May Stadium,114000,East Asia,North Korea,Pyongyang,//upload.wikimedia.org/wikipedia/commons/thumb...,"Korea DPR national football team, Korea DPR wo..."
1,2,Michigan Stadium,107600,North America,United States,"Ann Arbor, Michigan",//upload.wikimedia.org/wikipedia/commons/thumb...,Michigan Wolverines football
2,3,Ohio Stadium,102780,North America,United States,"Columbus, Ohio",//upload.wikimedia.org/wikipedia/commons/thumb...,Ohio State Buckeyes football
3,4,Melbourne Cricket Ground,100024,Oceania,Australia,"Melbourne, Victoria",//upload.wikimedia.org/wikipedia/commons/thumb...,"Australia national cricket team, Victoria cric..."
4,5,Camp Nou,99354,Europe,Spain,"Barcelona, Catalonia",//upload.wikimedia.org/wikipedia/commons/thumb...,FC Barcelona


In [42]:
list_of_stadium_df.to_csv('list_of_stadium.csv',index=False)


In [3]:
cells = table_rows[1].find_all('td')

In [6]:
stadium_name = cells[0].get_text().strip().replace('♦','')
stadium_name

'Rungrado 1st of May Stadium '

In [37]:
seating_cap = cells[1].get_text().strip().split('[')[0].replace(',','') if '[' in cells[1].get_text() else cells[1].get_text().strip().replace(',','') 
seating_cap

'40000'

In [12]:
region = cells[2].get_text().strip()
region

'East Asia'

In [13]:
country = cells[3].get_text().strip()
country

'North Korea'

In [14]:
city = cells[4].get_text().strip()
city

'Pyongyang'

In [20]:
image_url = cells[5].find('img')['src'] if cells[5] is not None else 'No Image'
image_url

'//upload.wikimedia.org/wikipedia/commons/thumb/8/81/Arirang_Mass_Games_12.JPG/150px-Arirang_Mass_Games_12.JPG'

In [21]:
home_team = cells[6].get_text().strip()
home_team

"Korea DPR national football team, Korea DPR women's national football team, April 25"