In [None]:
import re
import time
import requests
from io import StringIO
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
url = 'https://www.weerawongcp.com/people-list.php?type=practice&key=&order=name_asc&page=2'

In [None]:
response = requests.get(url)
response.encoding = response.apparent_encoding
soup = BeautifulSoup(response.text, 'lxml')

In [None]:
def get_individual_data(object):
    href_attr = object.find('a').get('href')
    full_href = f'https://www.weerawongcp.com/{href_attr}'
    
    r = requests.get(full_href)
    r.encoding = r.apparent_encoding
    s =  BeautifulSoup(r.text, 'lxml')

    full_name = s.find('h1').text.strip()
    print(f'Retrieving information for {full_name}')

    country = 'Thailand'
    title   = 'NA'
    generic_title = 'NA'
    given_name  = 'NA'
    family_name = 'NA'
    gender      = 'NA'
    organization = 'Weerawong, Chinnavat & Partners Ltd.'
    mobile = 'NA'

    contact_object = s.find('h3', string = re.compile('Contact')).find_next_siblings()
    try:
        phone = contact_object[0].text
        phone = re.search(
            r'T: (.*?)(?:$)', phone
        ).group(1).strip()
        phone
    except AttributeError:
        phone = 'Not Found'

    try: 
        email = contact_object[2].find('a').get('href').strip()
        email = re.search(
            r'mailto:(.*)$', email
        ).group(1).strip()
    except AttributeError:
        email = 'Not Found'

    try:
        languages = s.find('h3', string = re.compile('Languages')).find_next_siblings('p')[0].text.strip()
    except AttributeError:
        languages = 'Not Found'

    bio = s.find_all('table')[2].find('tr').find_all('td')[1].text.strip()

    lawyer_entry = {
        "country"         : country,
        "title"           : title,
        "generic_title"   : generic_title,
        "given_name"      : given_name,
        "family_name"     : family_name,
        "full_name"       : full_name,
        "gender"          : gender,
        "email"           : email,
        "languages"       : languages,
        # "position"        : position,
        "organization"    : organization,
        "phone"           : phone,
        "mobile"          : mobile,
        # "practice"        : practice,
        "full_href"       : full_href,
        "bio"             : bio
    }

    time.sleep(1)
    
    return lawyer_entry

In [None]:
def get_page_data(object):
    table_html = object.find('table', class_ = 'sptb1')
    page_data  = pd.read_html(StringIO(str(table_html)))[0]
    page_data.columns = ['full_name', 'practice', 'position']

    lawyer_data_list = [
        get_individual_data(person) 
        for person in soup.find_all('td', class_ = 'xname')
    ]
    lawyer_data = pd.DataFrame(lawyer_data_list)

    return [page_data, lawyer_data]

In [69]:
X = get_page_data(soup)

Retrieving information for Korakod Jittimaporn
Retrieving information for Masitorn Boonserm
Retrieving information for Na napat Bootphet
Retrieving information for Nakarintr Naka
Retrieving information for Nantanat Hattathammanoon
Retrieving information for Narisa Aeimamnuay
Retrieving information for Nattadit Chatmatasit
Retrieving information for Nattanicha Siamnikorn
Retrieving information for Natthanun Suksomboon
Retrieving information for Natthida Pranutnorapal
Retrieving information for Nicolas Ranza
Retrieving information for Nitcha Kasetpheutphon
Retrieving information for Nontachai Hemaratpitak
Retrieving information for Padej Khamcharoen
Retrieving information for Palita Lawanrattanakul


In [74]:
master_data = pd.merge(X[0], X[1], on = 'full_name', how = 'inner')
master_data

Unnamed: 0,full_name,position,practice,country,title,generic_title,given_name,family_name,gender,email,languages,organization,phone,mobile,full_href,bio
0,Korakod Jittimaporn,Dispute resolution,Associate,Thailand,,,,,,korakod.j@weerawongcp.com,"Thai, English","Weerawong, Chinnavat & Partners Ltd.",+ 66 2 264 8000,,https://www.weerawongcp.com/people-show.php?id...,Korakod Jittimaporn is an associate at Weerawo...
1,Masitorn Boonserm,Debt and Equity Capital Markets,Associate,Thailand,,,,,,masitorn.b@weerawongcp.com,"Thai, English","Weerawong, Chinnavat & Partners Ltd.",+ 66 2 264 8000,,https://www.weerawongcp.com/people-show.php?id...,Masitorn Boonserm is an associate in the capit...
2,Na napat Bootphet,,Associate,Thailand,,,,,,nanapat.b@weerawongcp.com,"Thai, English, Mandarin","Weerawong, Chinnavat & Partners Ltd.",+ 66 2 264 8000,,https://www.weerawongcp.com/people-show.php?id...,Ms. Na Napat Bootphet is an associate in the C...
3,Nakarintr Naka,Real estate and construction,Associate,Thailand,,,,,,nakarintr.n@weerawongcp.com,"Thai, English","Weerawong, Chinnavat & Partners Ltd.",,,https://www.weerawongcp.com/people-show.php?id...,Nakarintr Naka is an associate in the regulato...
4,Nantanat Hattathammanoon,Dispute resolution,Associate,Thailand,,,,,,nantanat.h@weerawongcp.com,"Thai, English","Weerawong, Chinnavat & Partners Ltd.",+ 66 2 264 8000,,https://www.weerawongcp.com/people-show.php?id...,Nantanat Hattathammanoon is an associate in th...
5,Narisa Aeimamnuay,Banking and Finance,Associate,Thailand,,,,,,narisa.a@weerawongcp.com,"Thai, English","Weerawong, Chinnavat & Partners Ltd.",+ 66 2 264 8000,,https://www.weerawongcp.com/people-show.php?id...,Narisa Aeimamnuay is an associate in the proje...
6,Nattadit Chatmatasit,,Associate,Thailand,,,,,,nattadit.c@weerawongcp.com,"Thai, English","Weerawong, Chinnavat & Partners Ltd.",+ 66 2 264 8000,,https://www.weerawongcp.com/people-show.php?id...,Nattadit Chatmatasit is an associate in the in...
7,Nattanicha Siamnikorn,Corporate and commercial,Associate,Thailand,,,,,,nattanicha.s@weerawongcp.com,"Thai, English","Weerawong, Chinnavat & Partners Ltd.",,,https://www.weerawongcp.com/people-show.php?id...,Nattanicha Siamnikorn is an associate in the f...
8,Natthanun Suksomboon,Debt and Equity Capital Markets,Associate,Thailand,,,,,,natthanun.s@weerawongcp.com,"English, Thai","Weerawong, Chinnavat & Partners Ltd.",,,https://www.weerawongcp.com/people-show.php?id...,Natthanun Suksomboon is an associate in the \r...
9,Natthida Pranutnorapal,Financial Restructuring,Partner,Thailand,,,,,,natthida.p@weerawongcp.com,"Thai, English","Weerawong, Chinnavat & Partners Ltd.",+ 66 2 264 8000,,https://www.weerawongcp.com/people-show.php?id=46,Natthida Pranutnorapal possesses extensive exp...
