# The Problem

Our client wants to extract finisher name, address, city, state, postal code, country, contact person and phone number from https://finishingandcoating.com/

### Loading required libraries, defyning headers and functions

In [1]:
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

headers = { 
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36', 
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
'Accept-Language' : 'en-US,en;q=0.5',
'Accept-Encoding' : 'gzip', 
'DNT' : '1', # Do Not Track Request Header 
'Connection' : 'close'
}

def get_page(url):
    page = requests.get(url, headers = headers)
    soup = BeautifulSoup(page.text, 'html.parser')
    return soup

### Getting all pages links from a to z

In [2]:
# defyning the soup with get_page function
soup = get_page('https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/a?site=1')

# getting all links
az_pages = soup.find_all('ul',{'class':'pagination'})[0].find_all('a',href = True)

# putting all links in a list called pages
pages = []

for page in az_pages:
    pages.append('https://finishingandcoating.com'+page['href'])

In [3]:
pages[0:10]

['https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/a/field_name',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/b/field_name',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/c/field_name',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/d/field_name',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/e/field_name',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/f/field_name',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/g/field_name',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/h/field_name',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/i/field_name',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/j/field_name']

### Going through each a-z page and getting all pages from 1 to N

In [4]:
all_finishers_pages = [] # variable to starage all pages

# going through each a-z page
for page in pages:
    
    soup = get_page(page)
    
    p = page.replace('/field_name','?site=').replace('https://finishingandcoating.com','')
    
    # finding the end page number
    
    pagination = soup.find_all('ul',{'class':'pagination'})
    
    if len(pagination)>=3:
        end_page = pagination[1]
        end_page = end_page.find_all('a',href = True)[len(end_page)-1]['href']
        end_page = end_page.replace(p,'')

        end_page = int(end_page)
    else:
        end_page = 1
    
    for n in range(1,end_page+1):

        all_finishers_pages.append(page.replace('/field_name','/?site=')+str(n))
        
        time.sleep(1)

In [5]:
all_finishers_pages[0:10]

['https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/a/?site=1',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/a/?site=2',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/a/?site=3',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/a/?site=4',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/a/?site=5',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/a/?site=6',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/a/?site=7',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/a/?site=8',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/a/?site=9',
 'https://finishingandcoating.com/index.php/find-a-finisher/list/alpha/a/?site=10']

### For each final page getting each finisher link

In [6]:
final_links = []

for page in all_finishers_pages:

    soup = get_page(page)
    
    fin_links = soup.find_all('h2',{'class':'namefield'})

    for f in fin_links:
        final_links.append('https://finishingandcoating.com'+f.find('a',href = True)['href'])

In [7]:
final_links[0:10]

['https://finishingandcoating.com/index.php/find-a-finisher/1211-a-plus-powder-coaters',
 'https://finishingandcoating.com/index.php/find-a-finisher/2558-a-brite-plating',
 'https://finishingandcoating.com/index.php/find-a-finisher/2556-a-luster-metal-finishing',
 'https://finishingandcoating.com/index.php/find-a-finisher/2555-a-plus-finishing',
 'https://finishingandcoating.com/index.php/find-a-finisher/3348-a-l-finishing-company-1',
 'https://finishingandcoating.com/index.php/find-a-finisher/931-a-m-metal-finishing',
 'https://finishingandcoating.com/index.php/find-a-finisher/1210-a-a-powder-coating',
 'https://finishingandcoating.com/index.php/find-a-finisher/1209-a-e-powder-coating',
 'https://finishingandcoating.com/index.php/find-a-finisher/2554-a-f-plating',
 'https://finishingandcoating.com/index.php/find-a-finisher/2553-a-r-plating']

### Going through each final link and getting all requested data into a dataframe

In [8]:
data_base = pd.DataFrame(columns = ['finisher',
                                    'address',
                                    'city',
                                    'state',
                                    'postal_code',
                                    'country',
                                    'contact_person',
                                    'phone'])

for fl in final_links:
    
    soup = get_page(fl)
    
    h1 = soup.find('h1',{'class':'namefield'})
    
    if h1!=None and len(h1)!=0:
        finisher = h1.text
    else:
        finisher = 'NA'
    
    adrs = soup.find('div',{'class':'spClassViewInbox street'})
    
    if adrs!=None and len(adrs)!=0:
        address = adrs.text
    else:
        address = 'NA'
    
    cty = soup.find('div',{'class':'spClassViewInbox city'})
    
    if cty!=None and len(cty)!=0:
        city = cty.text
    else:
        city = 'NA'
        
    stt = soup.find('div',{'class':'spClassViewInbox state'})
    
    if stt!=None and len(stt)!=0:
        state = stt.text
    else:
        state = 'NA'
    
    cd = soup.find('div',{'class':'spClassViewInbox postcode'})
    
    if cd!=None and len(cd)!=0:
        code = cd.text
    else:
        code = 'NA'
    
    cuntry = soup.find('div',{'class':'spClassViewSelect country'})
    
    if cuntry!=None and len(cuntry)!=0:
        country = cuntry.text
    else:
        country = 'NA'
        
    cont_p = soup.find_all('div',{'class':'spClassViewInbox'})
    
    if cont_p!=None and len(cont_p)!=0:
        
        if len(cont_p)>=5:
            contact_person = cont_p[4].text.replace('Contact Person: ','')
        else:
            contact_person = cont_p[len(cont_p)-1].text.replace('Contact Person: ','')
    else:
        contact_person = 'NA'
        
    ph = soup.find('div',{'class':'spClassViewInbox phone'})
    
    if ph!=None and len(ph)!=0:
        phone = ph.text.replace('Phone: ','')
    else:
        phone = 'NA'
    
    data_base = data_base.append({'finisher':finisher,
                                    'address':address,
                                    'city':city,
                                    'state':state,
                                    'postal_code':code,
                                    'country':country,
                                    'contact_person':contact_person,
                                    'phone':phone}, ignore_index = True)

In [9]:
data_base.head()

Unnamed: 0,finisher,address,city,state,postal_code,country,contact_person,phone
0,A Plus Powder Coaters,1384 Kauffman Ave,Columbiana,OH,44408,United States,Robert Bertelsen,330-482-1951
1,A-Brite Plating,3000 W 121St St,Cleveland,OH,44111,United States,Mojie Mirsalimi,216-252-2995
2,A-Luster Metal Finishing,Po Box 410068,Kansas City,MO,64141,United States,David Collins,816-471-2937
3,A-Plus Finishing,Po Box 966,Hudson,NH,3051,United States,Stan Araszkiewicz,603-595-1800
4,A. L. Finishing Company,925 Schwab Road,Hatfield,PA,19440,United States,Jamie Hemmerle,215-855-9422


In [10]:
data_base.shape

(1071, 8)

In [None]:
data_base.to_csv('finishers.csv')