In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Define the URL of the Wikipedia page to be scraped
url = 'https://en.wikipedia.org/wiki/List_of_chess_grandmasters'

# Send a GET request to the URL and store the response in a variable
response = requests.get(url)

# Parse the HTML content of the response using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

# Find the table containing the list of grand masters
table = soup.find('table', class_='wikitable')
rows = table.find_all('tr')
data = []

for row in rows[1:]:
    cells = row.find_all('td')
    name = cells[0].text.strip()
    country = cells[6].text.strip()
    year = cells[5].text.strip()
    born = cells[2].text.strip()
    data.append([name, country, year, born])

# Create a pandas dataframe from the data
df = pd.DataFrame(data, columns=['Name', 'Country', 'Title Year','Born'])

# Print the dataframe
df.sample(10)

Unnamed: 0,Name,Country,Title Year,Born
923,"Korotylev, Alexey",Russia,2000,1977-03-01
1235,"Moskalenko, Viktor",Ukraine,1992,1960-12-04
1303,"Nikitenko, Mihail",Belarus,2022,2000-03-28
16,"Adly, Ahmed",Egypt,2005,1987-02-19
1269,"Naroditsky, Daniel",United States,2013,1995-11-09
765,"Iskusnyh, Sergei",Russia,2000,1974-09-05
1871,"Tseshkovsky, Vitaly",Soviet Union,1975,1944-09-25
300,"Chibukhchian, Artur",Armenia,2009,1979-10-06
86,"Arkell, Keith C.",England,1995,1961-01-08
603,"Gorovykh, Eduard",Russia,2012,1990-02-07


In [3]:
pattern = r'(\d{4})-\d{2}-\d{2}'
df['Born Year'] = df['Born'].str.extract(pattern)
df.drop(columns=['Born'],inplace= True)

In [4]:
df.head(10)

Unnamed: 0,Name,Country,Title Year,Born Year
0,"Aagaard, Jacob",Denmark,2007,1973
1,"Abasov, Nijat",Azerbaijan,2011,1995
2,"Abbasifar, Hasan",Iran,2013,1972
3,"Abbasov, Farid",Azerbaijan,2007,1979
4,"Abdisalimov, Abdimalik",Uzbekistan,2023,2002
5,"Abdumalik, Zhansaya",Kazakhstan,2021,2000
6,"Abdusattorov, Nodirbek",Uzbekistan,2018,2004
7,"Abergel, Thal",France,2008,1982
8,"Abramović, Boško",Yugoslavia,1984,1951
9,"Abreu Delgado, Aryam",Cuba,2008,1978


In [5]:
df.isnull().sum()

Name           0
Country        0
Title Year     0
Born Year     16
dtype: int64

In [6]:
df.dropna(inplace= True)

In [7]:
df['Title Year'] = df['Title Year'].astype('int')
df['Born Year'] = df['Born Year'].astype('int')
df['Title Age'] = df['Title Year'] - df['Born Year']

In [8]:
max_age = df['Title Age'].max()
max_age_rows = df.loc[df['Title Age'] == max_age]
min_age = df['Title Age'].min()
min_age_rows = df.loc[df['Title Age'] == min_age]

In [9]:
max_age_rows

Unnamed: 0,Name,Country,Title Year,Born Year,Title Age
408,"Dückstein, Andreas",Austria,2024,1927,97


In [10]:
min_age_rows

Unnamed: 0,Name,Country,Title Year,Born Year,Title Age
1207,"Mishra, Abhimanyu",United States,2021,2009,12
