# Web Scraping LPSE - Vendor (Evaluation) Data

---

For introduction of Selenium, please be kind to open [**this site**](https://www.scrapingbee.com/blog/selenium-python/)

## Import modules

`%pip freeze > requirements.txt`

In [1]:
# Module for web scraping
from selenium import webdriver
# Module for data manipulation
import pandas as pd
from bs4 import BeautifulSoup

## Load the Chromedriver

Read how to download webdriver for Chrome [**here**]('https://chromedriver.chromium.org/downloads')

In [2]:
# Main link
main_link = 'https://lpse.lkpp.go.id/eproc4/lelang/7345119/pengumumanlelang'

In [3]:
# Access to main link
DRIVER_PATH = '../bin/chromedriver'
driver = webdriver.Chrome(executable_path = DRIVER_PATH)
driver.get(main_link)

## Core Procedure

### 1 Get links

In [4]:
# Get links
links = driver.find_elements_by_class_name('nav-link')
listLink = [link.get_attribute('href') for link in links]

In [5]:
# List of link
navName = ['Pengumuman', 'Peserta', 'Hasil Evaluasi', 'Pemenang', 'Pemenang Berkontrak']
dictLink = dict(zip(navName, listLink))
dictLink

{'Pengumuman': 'https://lpse.lkpp.go.id/eproc4/lelang/7345119/pengumumanlelang',
 'Peserta': 'https://lpse.lkpp.go.id/eproc4/lelang/7345119/peserta',
 'Hasil Evaluasi': 'https://lpse.lkpp.go.id/eproc4/evaluasi/7345119/hasil',
 'Pemenang': 'https://lpse.lkpp.go.id/eproc4/evaluasi/7345119/pemenang',
 'Pemenang Berkontrak': 'https://lpse.lkpp.go.id/eproc4/evaluasi/7345119/pemenangberkontrak'}

### 2 Get information of evaluation

#### Get link

In [7]:
# Link of evaluation
linkEvaluation = dictLink['Hasil Evaluasi']
linkEvaluation

'https://lpse.lkpp.go.id/eproc4/evaluasi/7345119/hasil'

In [8]:
# Access to evaluation's link
driver.get(linkEvaluation)

#### Get column names

In [9]:
# Get the column elements
evaluationSummaryData = driver.find_element_by_class_name('content')

In [17]:
# element for column names
colNames = evaluationSummaryData.find_element_by_tag_name('thead').find_elements_by_tag_name('th')

In [25]:
# Column names
listCols = []
for elem in colNames:
    col_raw = elem.text
    listCols.append(col_raw)
# Result
listCols = [i.replace('\n', ' ') for i in listCols]
listCols

['No',
 'Nama Peserta',
 'K',
 'Skor Kualifkasi',
 'Skor Pembuktian',
 'B',
 'A',
 'T',
 'Skor Teknis',
 'Penawaran',
 'Penawaran Terkoreksi',
 'Hasil Negosiasi',
 'H',
 'Skor Harga',
 'Skor Akhir',
 'P',
 'PK',
 'Alasan']

#### Get data from table

In [11]:
# Data collections
dataCollection = evaluationSummaryData.find_element_by_tag_name('tbody').find_elements_by_tag_name('tr')
print('Length of data in one page: {} rows'.format(len(dataCollection)))

Length of data in one page: 42 rows


In [361]:
# Dictionary with blank list
dict_init = {key: [] for key in listCols}
dict_init

{'No': [],
 'Nama Peserta': [],
 'K': [],
 'Skor Kualifkasi': [],
 'Skor Pembuktian': [],
 'B': [],
 'A': [],
 'T': [],
 'Skor Teknis': [],
 'Penawaran': [],
 'Penawaran Terkoreksi': [],
 'Hasil Negosiasi': [],
 'H': [],
 'Skor Harga': [],
 'Skor Akhir': [],
 'P': [],
 'PK': [],
 'Alasan': []}

In [312]:
row = dataCollection[1]
col = 2

In [252]:
row.find_elements_by_tag_name('th')[0].text

'96.0'

In [257]:
row.find_elements_by_tag_name('td')[3].find_element_by_tag_name('i').get_attribute('class')

'fa fa-check'

In [142]:
try:
    try:
        val = row.find_elements_by_tag_name('td')[col].find_element_by_tag_name('img').get_attribute('src')
        if val == 'https://lpse.lkpp.go.id/eproc4/public/images/star.gif':
            value = 'Star'
        else:
            value = None
    except:
        try:
            val = row.find_elements_by_tag_name('td')[col].find_element_by_tag_name('i').get_attribute('class')
            if val == 'fa fa-check':
                value = 'Checklist'
            elif val == 'fa fa-close':
                value = 'Blacklist'
            elif val == 'fa fa-minus':
                value = 'Strip'
        except:
            value = row.find_elements_by_tag_name('td')[col].text
except:
    value = None

In [143]:
value

'Star'

In [149]:
col

13

In [148]:
list(dict_init.keys())[col]

'Skor Harga'

In [147]:
dict_init[list(dict_init.keys())[col]]

['',
 'Star',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '']

In [238]:
# Get data
for row in dataCollection:
    for col in range(len(listCols)):
        index = 0
        try:
            try:
                val = row.find_elements_by_tag_name('td')[col].find_element_by_tag_name('img').get_attribute('src')
                if val == 'https://lpse.lkpp.go.id/eproc4/public/images/star.gif':
                    value = 'Star'
                else:
                    value = None
            except:
                try:
                    val = row.find_elements_by_tag_name('td')[col].find_element_by_tag_name('i').get_attribute('class')
                    if val == 'fa fa-check':
                        value = 'Checklist'
                    elif val == 'fa fa-close':
                        value = 'Blacklist'
                    elif val == 'fa fa-minus':
                        value = 'Strip'
                except:
                    try:
                        value = row.find_elements_by_tag_name('td')[col].text
                    except:
                        value = row.find_elements_by_tag_name('th')[index].text
                        index += 1
        except:
            value = None
        # Append values
        dict_init[list(dict_init.keys())[col]].append(value)

In [231]:
row = dataCollection[1]
col = 3

value = row.find_elements_by_tag_name('td')[col].text
if value == '':
    try:
        val = row.find_elements_by_tag_name('td')[col].find_element_by_tag_name('img').get_attribute('src')
        if val == 'https://lpse.lkpp.go.id/eproc4/public/images/star.gif':
            value = 'Star'
        else:
            value = None
    except:
        try:
            val = row.find_elements_by_tag_name('td')[col].find_element_by_tag_name('i').get_attribute('class')
            
            if val == 'fa fa-check':
                value = 'Checklist'
            elif val == 'fa fa-close':
                value = 'Blacklist'
            elif val == 'fa fa-minus':
                value = 'Strip'
        except:
            value = None

In [234]:
row.find_elements_by_tag_name('td')[col].find_element_by_tag_name('i').get_attribute('class')

'fa fa-check'

In [331]:
len(row.find_elements_by_css_selector("*"))

25

In [337]:
for i in row.find_elements_by_css_selector("*"):
    val = i.tag_name
    print(val)

td
td
td
i
th
th
td
i
td
i
td
i
th
td
td
td
td
i
td
td
td
img
td
img
td


In [342]:
'A' not in ['A', 'B']

False

In [343]:
idx = []
for i, j in zip(row.find_elements_by_css_selector("*"), range(len(row.find_elements_by_css_selector("*")))):
    val = i.tag_name
    if val in ['i', 'img']:
        continue
    else:
        name = i.text
        print(name)
        idx.append(j)

2
PT. Transtellar Inti Mitra - 72.334.802.5-061.000

96.0
96.0



89.6
Rp. 537.900.000,00
Rp. 537.900.000,00
Rp. 537.900.000,00

98.72
91.42





In [344]:
len(idx)

18

In [356]:
elemValues = []
for elem in row.find_elements_by_css_selector('*'):
    elemChoosen = elem.tag_name
    if elemChoosen in ['i', 'img']:
        continue
    else:
        elemValues.append(elem)

In [None]:
elem = elemValues
col

In [360]:
try:
    value = elem[col].text
    if value == '':
        try:
            val = elem[col].find_element_by_tag_name('img').get_attribute('src')
            if val == 'https://lpse.lkpp.go.id/eproc4/public/images/star.gif':
                value = 'Star'
            else:
                value = None
        except:
            try:
                val = elem[col].find_element_by_tag_name('i').get_attribute('class')
                if val == 'fa fa-check':
                    value = 'Checklist'
                elif val == 'fa fa-close':
                    value = 'Blacklist'
                elif val == 'fa fa-minus':
                    value = 'Strip'
            except:
                value = None
except:
    value = None

<selenium.webdriver.remote.webelement.WebElement (session="311ad631a4ac1d8aca67e3dbaf3067dd", element="e4637aec-f68b-48af-ba52-0f0c6d4ff4c6")>

# =============

In [362]:
for row in dataCollection:
    elemValues = []
    for elem in row.find_elements_by_css_selector('*'):
        elemChoosen = elem.tag_name
        if elemChoosen in ['i', 'img']:
            continue
        else:
            elemValues.append(elem)

    for col in range(len(elemValues)):
        try:
            value = elemValues[col].text
            if value == '':
                try:
                    val = elemValues[col].find_element_by_tag_name('img').get_attribute('src')
                    if val == 'https://lpse.lkpp.go.id/eproc4/public/images/star.gif':
                        value = 'Star'
                    else:
                        value = None
                except:
                    try:
                        val = elemValues[col].find_element_by_tag_name('i').get_attribute('class')
                        if val == 'fa fa-check':
                            value = 'Checklist'
                        elif val == 'fa fa-close':
                            value = 'Blacklist'
                        elif val == 'fa fa-minus':
                            value = 'Strip'
                    except:
                        value = None
        except:
            value = None
        # Append values
        dict_init[list(dict_init.keys())[col]].append(value)

In [363]:
dict_init

{'No': ['1',
  '2',
  '3',
  '4',
  '5',
  '6',
  '7',
  '8',
  '9',
  '10',
  '11',
  '12',
  '13',
  '14',
  '15',
  '16',
  '17',
  '18',
  '19',
  '20',
  '21',
  '22',
  '23',
  '24',
  '25',
  '26',
  '27',
  '28',
  '29',
  '30',
  '31',
  '32',
  '33',
  '34',
  '35',
  '36',
  '37',
  '38',
  '39',
  '40',
  '41',
  '42'],
 'Nama Peserta': ['PT. PILAR CIPTA SOLUSI INTEGRATIKA - 75.056.654.9-542.000',
  'PT. Transtellar Inti Mitra - 72.334.802.5-061.000',
  'PT. IP NETWORK SOLUSINDO - 21.009.448.8-028.000',
  'PT. GEOJAYA TEHNIK - 01.313.175.0-014.000',
  'PT. Royston Advisory Indonesia - 02.742.014.0-072.000',
  'PT.PUTRA BENTAR ENDAH - 01.339.061.2-031.000',
  'PT.SEMESTA MUDA BERKARYA - 95.026.345.9-411.000',
  'PT. Rangkai Data Solusi - 96.411.455.7-043.000',
  'PT. TRIMITRA DATA TEKNOLOGI - 03.215.086.4-063.000',
  'PT SINAR SURYA TEKNOLOGI - 02.901.606.0-063.000',
  'PT. Daya Makara UI - 02.109.525.2-412.000',
  'PT.WIDYA SOLUSI UTAMA - 72.053.985.7-517.000',
  'PT. BARN 

# =============

In [321]:
for i in row.find_elements_by_css_selector('*'):
    val = i.tag_name
    if in ['i', 'img']:
        continue
    else:
        name = i.text
    
    # print(name)
    try:
        value = i.text
        if value == '':
            try:
                val = i.find_element_by_tag_name('img').get_attribute('src')
                if val == 'https://lpse.lkpp.go.id/eproc4/public/images/star.gif':
                    value = 'Star'
                else:
                    value = None
            except:
                try:
                    val = i.find_element_by_tag_name('i').get_attribute('class')
                    if val == 'fa fa-check':
                        value = 'Checklist'
                    elif val == 'fa fa-close':
                        value = 'Blacklist'
                    elif val == 'fa fa-minus':
                        value = 'Strip'
                except:
                    value = None
    except:
        value = None

In [323]:
value

In [302]:
row.find_elements_by_css_selector("*")[2].find_element_by_tag_name('i').get_attribute('class')

'fa fa-check'

In [304]:
for row in dataCollection:
    colValues = row.find_elements_by_css_selector('*')
    for colval in colValues:
        try:
            value = colval.text
            if value == '':
                try:
                    val = colval.find_element_by_tag_name('img').get_attribute('src')
                    if val == 'https://lpse.lkpp.go.id/eproc4/public/images/star.gif':
                        value = 'Star'
                    else:
                        value = None
                except:
                    try:
                        val = colval.find_element_by_tag_name('i').get_attribute('class')
                        if val == 'fa fa-check':
                            value = 'Checklist'
                        elif val == 'fa fa-close':
                            value = 'Blacklist'
                        elif val == 'fa fa-minus':
                            value = 'Strip'
                    except:
                        value = None
        except:
            value = None
        # Append values
        dict_init[list(dict_init.keys())[col]].append(value)

In [232]:
value

'Checklist'

In [None]:
try:
    try:
        value = colval.text
    except:
        try:
            value = colval.text
            index += 1

        except:
            try:
                val = colval.find_element_by_tag_name('img').get_attribute('src')
                if val == 'https://lpse.lkpp.go.id/eproc4/public/images/star.gif':
                    value = 'Star'
                else:
                    value = None
            except:
                try:
                    val = colval.find_element_by_tag_name('i').get_attribute('class')
                    if val == 'fa fa-check':
                        value = 'Checklist'
                    elif val == 'fa fa-close':
                        value = 'Blacklist'
                    elif val == 'fa fa-minus':
                        value = 'Strip'
                except:
                    value = None
except:
    value = None

In [223]:
# Get data
for row in dataCollection:
    for col in range(len(listCols)):
        index = 0
        try:
            value = row.find_elements_by_tag_name('td')[col].text
            if value == '':
                try:
                    val = row.find_elements_by_tag_name('td')[col].find_element_by_tag_name('img').get_attribute('src')
                    if val == 'https://lpse.lkpp.go.id/eproc4/public/images/star.gif':
                        value = 'Star'
                    else:
                        value = None
                except:
                    try:
                        val = row.find_elements_by_tag_name('td')[col].find_element_by_tag_name('i').get_attribute('class')
                        if val == 'fa fa-check':
                            value = 'Checklist'
                        elif val == 'fa fa-close':
                            value = 'Blacklist'
                        elif val == 'fa fa-minus':
                            value = 'Strip'
                    except:
                        value = None
                
                if value == '':
                    value = row.find_elements_by_tag_name('th')[index].text
                    index += 1
        except:
            value = None
        # Append values
        dict_init[list(dict_init.keys())[col]].append(value)

In [249]:
len(evaluationSummaryData.find_element_by_tag_name('tbody').find_elements_by_tag_name('tr'))

42

In [None]:
.find_elements_by_tag_name('th')

In [202]:
# Get data
for row in dataCollection:
    for col in range(len(listCols)):
        index = 0
        try:
            try:
                value = row.find_elements_by_tag_name('td')[col].text
            except:
                try:
                    value = row.find_elements_by_tag_name('th')[index].text
                    index += 1
                    
                except:
                    try:
                        val = row.find_elements_by_tag_name('td')[col].find_element_by_tag_name('img').get_attribute('src')
                        if val == 'https://lpse.lkpp.go.id/eproc4/public/images/star.gif':
                            value = 'Star'
                        else:
                            value = None
                    except:
                        try:
                            val = row.find_elements_by_tag_name('td')[col].find_element_by_tag_name('i').get_attribute('class')
                            if val == 'fa fa-check':
                                value = 'Checklist'
                            elif val == 'fa fa-close':
                                value = 'Blacklist'
                            elif val == 'fa fa-minus':
                                value = 'Strip'
                        except:
                            value = None
        except:
            value = None
        # Append values
        dict_init[list(dict_init.keys())[col]].append(value)

In [239]:
dict_init

{'No': ['1',
  '2',
  '3',
  '4',
  '5',
  '6',
  '7',
  '8',
  '9',
  '10',
  '11',
  '12',
  '13',
  '14',
  '15',
  '16',
  '17',
  '18',
  '19',
  '20',
  '21',
  '22',
  '23',
  '24',
  '25',
  '26',
  '27',
  '28',
  '29',
  '30',
  '31',
  '32',
  '33',
  '34',
  '35',
  '36',
  '37',
  '38',
  '39',
  '40',
  '41',
  '42'],
 'Nama Peserta': ['PT. PILAR CIPTA SOLUSI INTEGRATIKA - 75.056.654.9-542.000',
  'PT. Transtellar Inti Mitra - 72.334.802.5-061.000',
  'PT. IP NETWORK SOLUSINDO - 21.009.448.8-028.000',
  'PT. GEOJAYA TEHNIK - 01.313.175.0-014.000',
  'PT. Royston Advisory Indonesia - 02.742.014.0-072.000',
  'PT.PUTRA BENTAR ENDAH - 01.339.061.2-031.000',
  'PT.SEMESTA MUDA BERKARYA - 95.026.345.9-411.000',
  'PT. Rangkai Data Solusi - 96.411.455.7-043.000',
  'PT. TRIMITRA DATA TEKNOLOGI - 03.215.086.4-063.000',
  'PT SINAR SURYA TEKNOLOGI - 02.901.606.0-063.000',
  'PT. Daya Makara UI - 02.109.525.2-412.000',
  'PT.WIDYA SOLUSI UTAMA - 72.053.985.7-517.000',
  'PT. BARN 