In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Define the URL of the Wikipedia page to be scraped
url = 'https://en.wikipedia.org/wiki/List_of_chess_grandmasters'

# Send a GET request to the URL and store the response in a variable
response = requests.get(url)

# Parse the HTML content of the response using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

# Find the table containing the list of grand masters
table = soup.find('table', class_='wikitable')
rows = table.find_all('tr')
data = []

for row in rows[1:]:
    cells = row.find_all('td')
    name = cells[0].text.strip()
    country = cells[6].text.strip()
    year = cells[5].text.strip()
    born = cells[2].text.strip()
    data.append([name, country, year, born])

# Create a pandas dataframe from the data
df = pd.DataFrame(data, columns=['Name', 'Country', 'Title Year','Born'])

# Print the dataframe
df.sample(10)

Unnamed: 0,Name,Country,Title Year,Born
617,"Gumularz, Szymon",Poland,2021,2001-12-22
1983,Zaw Win Lay,Myanmar,2000,1963-10-22
946,"Kuczyński, Robert",Poland,1993,1966-04-17
1787,"Tin, Jingyao",Singapore,2022,2000
88,"Arribas Lopez, Angel",Spain,2014,1993-11-27
459,"Filip, Miroslav",Czecho­slovakia,1955,1928-10-27
453,"Fernandez Romero, Ernesto",Spain,2018,1983-01-23
1946,"Yankelevich, Lev",Germany,2022,1997-10-28
1170,"Miron, Lucian-Costin",Romania,2019,1987-09-14
1440,"Quesada Pérez, Yuniesky",Cuba,2005,1984-07-31


In [3]:
pattern = r'(\d{4})-\d{2}-\d{2}'
df['Born Year'] = df['Born'].str.extract(pattern)
df.drop(columns=['Born'],inplace= True)

In [4]:
df.head(10)

Unnamed: 0,Name,Country,Title Year,Born Year
0,"Aagaard, Jacob",Denmark,2007,1973
1,"Abasov, Nijat",Azerbaijan,2011,1995
2,"Abbasifar, Hasan",Iran,2013,1972
3,"Abbasov, Farid",Azerbaijan,2007,1979
4,"Abdumalik, Zhansaya",Kazakhstan,2021,2000
5,"Abdusattorov, Nodirbek",Uzbekistan,2018,2004
6,"Abergel, Thal",France,2008,1982
7,"Abramović, Boško",Yugoslavia,1984,1951
8,"Abreu Delgado, Aryam",Cuba,2008,1978
9,"Ács, Péter",Hungary,1998,1981


In [5]:
df.isnull().sum()

Name           0
Country        0
Title Year     0
Born Year     13
dtype: int64

In [6]:
df.dropna(inplace= True)

In [7]:
df['Title Year'] = df['Title Year'].astype('int')
df['Born Year'] = df['Born Year'].astype('int')
df['Title Age'] = df['Title Year'] - df['Born Year']

In [8]:
max_age = df['Title Age'].max()
max_age_rows = df.loc[df['Title Age'] == max_age]

In [10]:
max_age_rows

Unnamed: 0,Name,Country,Title Year,Born Year,Title Age
1329,"Paoli, Enrico",Italy,1996,1908,88


In [11]:
import seaborn as sns 


In [None]:
sns.he