# **Extracting Personal Information from Wikipedia using Web Scraper and Regex in Python**


In [53]:

from bs4 import BeautifulSoup
import requests
import re

headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36'} 
url = 'https://en.wikipedia.org/wiki/Elon_Musk'
webpage = requests.get(url,headers=headers).text
#soup=BeautifulSoup(webpage,'lxml')

soup = BeautifulSoup(webpage, 'html.parser')

# Find the table with class "infobox vcard"
table = soup.find('table', class_=re.compile(r'^infobox'))

# Check if table exists, then extract <tr> rows
if table:
    all_text = '\n'.join(tr.get_text(strip=True) for tr in table.find_all('tr'))
    print(all_text)
else:
    print("Table with class 'infobox vcard' not found.")



Elon MuskFRS
Musk in 2022

Senior Advisor to the President
In officeJanuary 20, 2025 – May 30, 2025Serving withMassad Boulos
PresidentDonald Trump
Preceded byTom Perez

Personal details
BornElon Reeve Musk(1971-06-28)June 28, 1971(age 54)Pretoria, South Africa
CitizenshipSouth Africa (since 1971)Canada (since 1989)United States (since 2002)
Political partyIndependent
Other politicalaffiliationsAmerica Party(2025–present)
SpousesJustine Wilson​​(m.2000;div.2008)​Talulah Riley​​(m.2010;div.2016)​
ChildrenAt least 14
ParentsErrol Musk(father)Maye Musk(mother)
RelativesMusk family
EducationUniversity of Pennsylvania(BA,BS)
OccupationCEO and product architect ofTeslaFounder, CEO, and chief engineer ofSpaceXFounder and CEO ofxAIFounder ofthe Boring CompanyandX Corp.Co-founder ofNeuralink,OpenAI,Zip2, andX.com(part ofPayPal)President of theMusk Foundation
AwardsFull list
Signature
Elon Musk's voiceElon Musk on his departure from theDepartment of Government EfficiencyRecorded May 30, 2025



In [54]:
def get_pattern_match(pattern, text, flags=0):
    match = re.search(pattern, text, flags)
    return match.group(1).strip() if match else None

In [55]:

def extract_personal_information(text):
    full_name = get_pattern_match(r'Born\s*([A-Z][^\(]+)', text)
    birth_date = get_pattern_match(r'\((\d{4}-\d{2}-\d{2})\)', text)
    age = get_pattern_match(r'age\s*(\d+)', text)
    birth_place = get_pattern_match(r'age\s*\d+\)?\s*([A-Z][^\n]+)', text)

    return {
        'age': int(age) if age else None,
        'name': full_name or '',
        'birth_date': birth_date or '',
        'birth_place': birth_place or ''
    }

In [56]:
extract_personal_information(all_text)


{'age': 54,
 'name': 'Elon Reeve Musk',
 'birth_date': '1971-06-28',
 'birth_place': 'Pretoria, South Africa'}