### Imports

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import sqlalchemy

### HTTP Request

#### store website in variable

In [2]:
website="https://www.justia.com/lawyers/california/san-francisco"

#### Get Request

In [3]:
response=requests.get(website)

#### Status Code

In [4]:
response

<Response [200]>

### Soup Object

In [6]:
soup=BeautifulSoup(response.content, "html.parser")
#soup

### Results

In [8]:
results=soup.findAll("div",{"data-vars-action":"OrganicListing"})

In [9]:
len(results)

40

### Target necessary data

In [None]:
# Name
# Short Bio
# Specialization
# University
# Address
# Phone
# Email Link

#### Name

In [14]:
results[0].find("strong",{"class":"lawyer-name"}).get_text().strip()

'Doug Bend'

#### Short Bio

In [15]:
results[0].find("div",{"class":"lawyer-expl"}).get_text().strip()

'San Francisco, CA Attorney'

#### Specialization

In [17]:
results[0].find("span",{"class":"-practices"}).get_text()

'Business, Entertainment & Sports, Real Estate and Trademarks'

#### University

In [18]:
results[0].find("span",{"class":"-law-schools"}).get_text()

'Georgetown University Law Center'

#### Address

In [25]:
results[0].find("span",{"class":"-address"}).get_text().strip().replace("\t","").replace("\n","")

'2181 Greenwich StreetSan Francisco,CA 94123'

#### Phone

In [28]:
results[0].find("strong",{"class":"-phone"}).get_text().strip()

'(415) 633-6841'

#### Email Link

In [30]:
results[0].find("a",{"class":"-email"}).get("href")

'https://lawyers.justia.com/lawyer/doug-bend-1662623/contact'

### Put everything together inside a For-Loop

In [32]:
name=[]
short_bio=[]
specialization=[]
university=[]
address=[]
phone=[]
email_link=[]
for result in results:
    #name
    try:
        name.append(result.find("strong",{"class":"lawyer-name"}).get_text().strip())
    except:
        name.append("")
    #short_bio
    try:
        short_bio.append(result.find("div",{"class":"lawyer-expl"}).get_text().strip())
    except:
        short_bio.append("")
    #specialization
    try:
        specialization.append(result.find("span",{"class":"-practices"}).get_text())
    except:
        specialization.append("")
    #university
    try:
        university.append(result.find("span",{"class":"-law-schools"}).get_text())
    except:
        university.append("")
    #address
    try:
        address.append(result.find("strong",{"class":"lawyer-name"}).get_text().strip())
    except:
        address.append("")
    #phone
    try:
        phone.append(result.find("strong",{"class":"-phone"}).get_text().strip())
    except:
        phone.append("")
    #email link
    try:
        email_link.append(result.find("a",{"class":"-email"}).get("href"))
    except:
        email_link.append("")
    

### Create Pandas Dataframe

In [41]:
lawyers_df=pd.DataFrame({"lawyer_name":name,
                        "short_bio":short_bio,
                        "specialization":specialization,
                        "university":university,
                        "address":address,
                        "phone":phone,
                        "email_link":email_link})

In [42]:
lawyers_df

Unnamed: 0,lawyer_name,short_bio,specialization,university,address,phone,email_link
0,Doug Bend,"San Francisco, CA Attorney","Business, Entertainment & Sports, Real Estate ...",Georgetown University Law Center,Doug Bend,(415) 633-6841,https://lawyers.justia.com/lawyer/doug-bend-16...
1,Sam Amin,"San Francisco, CA Attorney",Criminal and DUI,John F. Kennedy University,Sam Amin,(415) 300-2037,https://lawyers.justia.com/lawyer/sam-amin-150...
2,Pezhman Pakneshan,"San Francisco, CA Attorney with 17 years of ex...","Civil Rights, Criminal, DUI and Domestic Violence",Univ of Pennsylvania LS,Pezhman Pakneshan,(415) 293-8454,https://lawyers.justia.com/lawyer/pezhman-pakn...
3,Christopher F. Morales,"San Francisco, CA Lawyer with 30 years of expe...","Criminal, DUI and Juvenile",Santa Clara Univ School of Law,Christopher F. Morales,(415) 552-1215,https://lawyers.justia.com/lawyer/christopher-...
4,Sweta Khandelwal,"San Francisco, CA Lawyer with 18 years of expe...",Immigration,"UCLA School of Law and Faculty of Law, Delhi U...",Sweta Khandelwal,(408) 317-4662,https://lawyers.justia.com/lawyer/sweta-khande...
5,Richard Alexander,"San Francisco, CA Attorney with 50 years of ex...","Asbestos, Consumer, Personal Injury and Produc...",The University of Chicago Law School,Richard Alexander,(415) 921-1776,https://lawyers.justia.com/lawyer/richard-alex...
6,Marin Cionca,"San Francisco, CA Attorney with 14 years of ex...","IP, Patents and Trademarks",Concord Law School,Marin Cionca,(800) 454-1360,https://lawyers.justia.com/lawyer/marin-cionca...
7,Jason M. Horst,"San Francisco, CA Lawyer with 15 years of expe...","Appeals, Business, Cannabis Law and Insurance ...",University of San Francisco School of Law,Jason M. Horst,(415) 871-6567,https://lawyers.justia.com/lawyer/jason-m-hors...
8,Debra Schoenberg,"San Francisco, CA Attorney with 34 years of ex...",Divorce and Family,University of New Hampshire School of Law,Debra Schoenberg,(415) 834-1120,https://lawyers.justia.com/lawyer/debra-schoen...
9,"Ali Shahrestani, Esq.","San Francisco, CA Lawyer with 14 years of expe...","Business, Criminal, Divorce and Education",University of California Hastings College of t...,"Ali Shahrestani, Esq.",(800) 510-3916,https://lawyers.justia.com/lawyer/ali-shahrest...


### Output in Excel

In [40]:
lawyers_df.to_excel("lawyers_single.xlsx", index=False)

### Part 2 - Pagination - Scrape 20 Pages

In [43]:
name=[]
short_bio=[]
specialization=[]
university=[]
address=[]
phone=[]
email_link=[]
for i in range(1,21):
    website="https://www.justia.com/lawyers/california/san-francisco?page="+str(i)
    response=requests.get(website)
    soup=BeautifulSoup(response.content, "html.parser")
    results=soup.findAll("div",{"data-vars-action":"OrganicListing"})
    for result in results:
        #name
        try:
            name.append(result.find("strong",{"class":"lawyer-name"}).get_text().strip())
        except:
            name.append("")
        #short_bio
        try:
            short_bio.append(result.find("div",{"class":"lawyer-expl"}).get_text().strip())
        except:
            short_bio.append("")
        #specialization
        try:
            specialization.append(result.find("span",{"class":"-practices"}).get_text())
        except:
            specialization.append("")
        #university
        try:
            university.append(result.find("span",{"class":"-law-schools"}).get_text())
        except:
            university.append("")
        #address
        try:
            address.append(result.find("strong",{"class":"lawyer-name"}).get_text().strip())
        except:
            address.append("")
        #phone
        try:
            phone.append(result.find("strong",{"class":"-phone"}).get_text().strip())
        except:
            phone.append("")
        #email link
        try:
            email_link.append(result.find("a",{"class":"-email"}).get("href"))
        except:
            email_link.append("")
            
lawyers_multiple_df=pd.DataFrame({"lawyer_name":name,
                        "short_bio":short_bio,
                        "specialization":specialization,
                        "university":university,
                        "address":address,
                        "phone":phone,
                        "email_link":email_link})
            

In [44]:
lawyers_multiple_df

Unnamed: 0,lawyer_name,short_bio,specialization,university,address,phone,email_link
0,Marin Cionca,"San Francisco, CA Attorney with 14 years of ex...","IP, Patents and Trademarks",Concord Law School,Marin Cionca,(800) 454-1360,https://lawyers.justia.com/lawyer/marin-cionca...
1,Christopher F. Morales,"San Francisco, CA Lawyer with 30 years of expe...","Criminal, DUI and Juvenile",Santa Clara Univ School of Law,Christopher F. Morales,(415) 552-1215,https://lawyers.justia.com/lawyer/christopher-...
2,Debra Schoenberg,"San Francisco, CA Attorney with 34 years of ex...",Divorce and Family,University of New Hampshire School of Law,Debra Schoenberg,(415) 834-1120,https://lawyers.justia.com/lawyer/debra-schoen...
3,Doug Bend,"San Francisco, CA Attorney","Business, Entertainment & Sports, Real Estate ...",Georgetown University Law Center,Doug Bend,(415) 633-6841,https://lawyers.justia.com/lawyer/doug-bend-16...
4,Randall H Scarlett,"San Francisco, CA Attorney","Asbestos, Civil Rights, Consumer and Personal ...",Golden Gate University School of Law,Randall H Scarlett,(415) 688-2176,https://lawyers.justia.com/lawyer/randall-h-sc...
...,...,...,...,...,...,...,...
795,Alexander Fredrick Harper,"Oakland, CA Lawyer with 16 years of experience","Criminal, DUI and Juvenile",University of Florida,Alexander Fredrick Harper,(510) 250-9477,https://lawyers.justia.com/lawyer/alexander-fr...
796,Bruce J. Napell,"San Rafael, CA Attorney with 37 years of exper...",Business,University of California Hastings College of t...,Bruce J. Napell,(415) 460-0100,https://lawyers.justia.com/lawyer/bruce-j-nape...
797,Arete Rita Kostopoulos,"Oakland, CA Lawyer",Bankruptcy,Wayne State University Law School,Arete Rita Kostopoulos,(510) 270-2782,https://lawyers.justia.com/lawyer/arete-rita-k...
798,Kimberly A. Madigan,"Redwood City, CA Lawyer with 19 years of exper...",Divorce and Family,Univ of Alberta,Kimberly A. Madigan,(650) 482-8480,https://lawyers.justia.com/lawyer/kimberly-a-m...


#### Excel

In [45]:
lawyers_multiple_df.to_excel("lawyers_multiple.xlsx", index=False)

#### Postgres

In [46]:
#create sqlalchemy engine
engine=sqlalchemy.create_engine('postgresql://postgres:12345@localhost:5432')
lawyers_multiple_df.to_sql("lawyers",engine,index=False)