# Web Scraping LPSE - Vendor (Evaluation) Data

---

For introduction of Selenium, please be kind to open [**this site**](https://www.scrapingbee.com/blog/selenium-python/)

## Import modules

`%pip freeze > requirements.txt`

In [36]:
# Module for web scraping
from selenium import webdriver
# Module for data manipulation
import pandas as pd
from bs4 import BeautifulSoup

## Load the Chromedriver

Read how to download webdriver for Chrome [**here**]('https://chromedriver.chromium.org/downloads')

In [37]:
# Main link
main_link = 'https://lpse.lkpp.go.id/eproc4/lelang/139119/pengumumanlelang'

In [38]:
# Access to main link
DRIVER_PATH = '../bin/chromedriver'
driver = webdriver.Chrome(executable_path = DRIVER_PATH)
driver.get(main_link)

## Core Procedure

### 1 Get links

In [39]:
# Get links
links = driver.find_elements_by_class_name('nav-link')
listLink = [link.get_attribute('href') for link in links]

In [40]:
# List of link
navName = ['Pengumuman', 'Peserta', 'Hasil Evaluasi', 'Pemenang', 'Pemenang Berkontrak']
dictLink = dict(zip(navName, listLink))
dictLink

{'Pengumuman': 'https://lpse.lkpp.go.id/eproc4/lelang/139119/pengumumanlelang',
 'Peserta': 'https://lpse.lkpp.go.id/eproc4/lelang/139119/peserta',
 'Hasil Evaluasi': 'https://lpse.lkpp.go.id/eproc4/evaluasi/139119/hasil',
 'Pemenang': 'https://lpse.lkpp.go.id/eproc4/evaluasi/139119/pemenang',
 'Pemenang Berkontrak': 'https://lpse.lkpp.go.id/eproc4/evaluasi/139119/pemenangberkontrak'}

### 2 Get information of evaluation

#### Get link

In [41]:
# Link of evaluation
linkEvaluation = dictLink['Hasil Evaluasi']
linkEvaluation

'https://lpse.lkpp.go.id/eproc4/evaluasi/139119/hasil'

In [42]:
# Access to evaluation's link
driver.get(linkEvaluation)

#### Get column names

In [43]:
# Get the column elements
evaluationSummaryData = driver.find_element_by_class_name('content')

In [44]:
# element for column names
colNames = evaluationSummaryData.find_element_by_tag_name('thead').find_elements_by_tag_name('th')

In [45]:
# Column names
listCols = []
for elem in colNames:
    col_raw = elem.text
    listCols.append(col_raw)
# Result
listCols = [i.replace('\n', ' ') for i in listCols]
listCols

['No',
 'Nama Peserta',
 'K',
 'A',
 'T',
 'Penawaran',
 'Penawaran Terkoreksi',
 'H',
 'P',
 'PK',
 'Alasan']

#### Get data from table

In [46]:
# Data collections
dataCollection = evaluationSummaryData.find_element_by_tag_name('tbody').find_elements_by_tag_name('tr')
print('Length of data in one page: {} rows'.format(len(dataCollection)))

Length of data in one page: 23 rows


In [47]:
# Dictionary with blank list
dict_init = {key: [] for key in listCols}
dict_init

{'No': [],
 'Nama Peserta': [],
 'K': [],
 'A': [],
 'T': [],
 'Penawaran': [],
 'Penawaran Terkoreksi': [],
 'H': [],
 'P': [],
 'PK': [],
 'Alasan': []}

In [48]:
# Get a data from table
for row in dataCollection:
    elemValues = []
    for elem in row.find_elements_by_css_selector('*'):
        elemChoosen = elem.tag_name
        if elemChoosen in ['i', 'img']:
            continue
        else:
            elemValues.append(elem)

    for col in range(len(elemValues)):
        try:
            value = elemValues[col].text
            if value == '':
                try:
                    val = elemValues[col].find_element_by_tag_name('img').get_attribute('src')
                    if val == 'https://lpse.lkpp.go.id/eproc4/public/images/star.gif':
                        value = 'Star'
                    else:
                        value = None
                except:
                    try:
                        val = elemValues[col].find_element_by_tag_name('i').get_attribute('class')
                        if val == 'fa fa-check':
                            value = 'Checklist'
                        elif val == 'fa fa-close':
                            value = 'Blacklist'
                        elif val == 'fa fa-minus':
                            value = 'Strip'
                    except:
                        value = None
        except:
            value = None
        # Append values
        dict_init[list(dict_init.keys())[col]].append(value)

## Convert into JSON

In [49]:
# Add tender's code as identifier
dict_full = {
    '139119': dict_init
}

In [50]:
# Data
dict_full

{'139119': {'No': ['1',
   '2',
   '3',
   '4',
   '5',
   '6',
   '7',
   '8',
   '9',
   '10',
   '11',
   '12',
   '13',
   '14',
   '15',
   '16',
   '17',
   '18',
   '19',
   '20',
   '21',
   '22',
   '23'],
  'Nama Peserta': ['CV.CAHAYA BHAKTI - 01.822.785.0-424.000',
   'PT.ALDONIAL PUTRA PERKASA - 01.854.959.2-009.000',
   'CV. TIGO TUGKU SAJARANGAN - 02.551.310.2-201.000',
   'CV KIREI NA YUKI - 02.789.253.8-429.000',
   'CV ALBAR. - 02.699.386.5-532.000',
   'CV. DEWA LINTANG SAMUDRA - 31.365.510.2-502.000',
   'CV. ANDREW PUTRA RAJA BANGE - 03.135.611.6-001.000',
   'PT. INDO LORAN WIDIA JAYA - 01.366.483.4-023.000',
   'CV. Arby Putra - 21.028.251.3-424.000',
   'CV. Fortuna Abadi Jaya - 31.334.227.1-614.000',
   'CV. WAHYU PRATAMA ADYA - 02.906.537.2-006.000',
   'CV. BYTEL MITRACOMINDO - 21.018.469.3-426.000',
   'CV. NAGA SAKTI JAYA PERKASA - 31.181.290.3-424.000',
   'CV. KHARISMA RISTA - 01.678.111.4-428.000',
   'CV. SUCORPINDO - 31.347.989.1-121.000',
   'CV. TRI M