# Web Scraping LPSE - Vendor (Participants) Data

---

For introduction of Selenium, please be kind to open [**this site**](https://www.scrapingbee.com/blog/selenium-python/)

## Import modules

`%pip freeze > requirements.txt`

In [8]:
# Module for web scraping
from selenium import webdriver
# Module for data manipulation
import pandas as pd
from bs4 import BeautifulSoup

## Load the Chromedriver

Read how to download webdriver for Chrome [**here**]('https://chromedriver.chromium.org/downloads')

In [9]:
# Main link
main_link = 'https://lpse.lkpp.go.id/eproc4/lelang/7345119/pengumumanlelang'

In [10]:
# Access to main link
DRIVER_PATH = '../bin/chromedriver'
driver = webdriver.Chrome(executable_path = DRIVER_PATH)
driver.get(main_link)

## Core Procedure

### 1 Get links

In [11]:
# Get links
links = driver.find_elements_by_class_name('nav-link')
listLink = [link.get_attribute('href') for link in links]

In [12]:
# List of link
navName = ['Pengumuman', 'Peserta', 'Hasil Evaluasi', 'Pemenang', 'Pemenang Berkontrak']
dictLink = dict(zip(navName, listLink))
dictLink

{'Pengumuman': 'https://lpse.lkpp.go.id/eproc4/lelang/7345119/pengumumanlelang',
 'Peserta': 'https://lpse.lkpp.go.id/eproc4/lelang/7345119/peserta',
 'Hasil Evaluasi': 'https://lpse.lkpp.go.id/eproc4/evaluasi/7345119/hasil',
 'Pemenang': 'https://lpse.lkpp.go.id/eproc4/evaluasi/7345119/pemenang',
 'Pemenang Berkontrak': 'https://lpse.lkpp.go.id/eproc4/evaluasi/7345119/pemenangberkontrak'}

### 2 Get information of participants

#### Get link

In [13]:
# Link of participants
linkParticipant = dictLink['Peserta']
linkParticipant

'https://lpse.lkpp.go.id/eproc4/lelang/7345119/peserta'

In [14]:
# Access to participant's link
driver.get(linkParticipant)

#### Get column names

In [15]:
# Get the column elements
participantSummaryData = driver.find_element_by_class_name('content')

In [16]:
# element for column names
colNames = participantSummaryData.find_element_by_tag_name('thead').find_elements_by_tag_name('th')

In [17]:
# Column names
listCols = []
for elem in colNames:
    col_raw = elem.text
    listCols.append(col_raw)
# Result
listCols = [i.replace('\n', ' ') for i in listCols]
listCols

['No', 'Nama Peserta', 'NPWP', 'Harga Penawaran', 'Harga Terkoreksi']

#### Get data from table

In [20]:
# Data collections
dataCollection = participantSummaryData.find_element_by_tag_name('tbody').find_elements_by_tag_name('tr')
print('Length of data in one page: {} rows'.format(len(dataCollection)))

Length of data in one page: 42 rows


In [21]:
# Dictionary with blank list
dict_init = {key: [] for key in listCols}
dict_init

{'No': [],
 'Nama Peserta': [],
 'NPWP': [],
 'Harga Penawaran': [],
 'Harga Terkoreksi': []}

In [22]:
# Get data
for row in dataCollection:
    elemValues = row.find_elements_by_tag_name('td')
    for col in range(len(elemValues)):
        value = elemValues[col].text
        # Append values
        dict_init[list(dict_init.keys())[col]].append(value)

In [23]:
# Data
dict_init

{'No': ['1',
  '2',
  '3',
  '4',
  '5',
  '6',
  '7',
  '8',
  '9',
  '10',
  '11',
  '12',
  '13',
  '14',
  '15',
  '16',
  '17',
  '18',
  '19',
  '20',
  '21',
  '22',
  '23',
  '24',
  '25',
  '26',
  '27',
  '28',
  '29',
  '30',
  '31',
  '32',
  '33',
  '34',
  '35',
  '36',
  '37',
  '38',
  '39',
  '40',
  '41',
  '42'],
 'Nama Peserta': ['PT. PILAR CIPTA SOLUSI INTEGRATIKA',
  'PT. Transtellar Inti Mitra',
  'PT. IP NETWORK SOLUSINDO',
  'PT. GEOJAYA TEHNIK',
  'PT. Royston Advisory Indonesia',
  'PT.PUTRA BENTAR ENDAH',
  'PT.SEMESTA MUDA BERKARYA',
  'PT. Rangkai Data Solusi',
  'PT. TRIMITRA DATA TEKNOLOGI',
  'PT SINAR SURYA TEKNOLOGI',
  'PT. Daya Makara UI',
  'PT.WIDYA SOLUSI UTAMA',
  'PT. BARN CITA LAKSANA',
  'PT. NUSA TIARA',
  'PI AREA',
  'PT. CAKRABUANA CONSULTANTS',
  'PT. PricewaterhouseCoopers Consulting Indonesia',
  'CV FINDELAU YAHEAI',
  'PT. Miskat Alam Pro',
  'PT. Binadaya Inti Dinamika',
  'CV ROBOTSOFT',
  'CV. LANTAR CIPTA MEDIA',
  'PT. PRIMA SYS

## Convert into JSON

In [38]:
# Add tender's code as identifier
dict_full = {
    '139119': dict_init
}

In [39]:
# Data
dict_full

{'139119': {'No': ['1',
   '2',
   '3',
   '4',
   '5',
   '6',
   '7',
   '8',
   '9',
   '10',
   '11',
   '12',
   '13',
   '14',
   '15',
   '16',
   '17',
   '18',
   '19',
   '20',
   '21',
   '22',
   '23',
   '24',
   '25',
   '26',
   '27',
   '28',
   '29',
   '30',
   '31',
   '32',
   '33',
   '34',
   '35',
   '36',
   '37',
   '38',
   '39',
   '40',
   '41',
   '42'],
  'Nama Peserta': ['PT. PILAR CIPTA SOLUSI INTEGRATIKA',
   'PT. Transtellar Inti Mitra',
   'PT. IP NETWORK SOLUSINDO',
   'PT. GEOJAYA TEHNIK',
   'PT. Royston Advisory Indonesia',
   'PT.PUTRA BENTAR ENDAH',
   'PT.SEMESTA MUDA BERKARYA',
   'PT. Rangkai Data Solusi',
   'PT. TRIMITRA DATA TEKNOLOGI',
   'PT SINAR SURYA TEKNOLOGI',
   'PT. Daya Makara UI',
   'PT.WIDYA SOLUSI UTAMA',
   'PT. BARN CITA LAKSANA',
   'PT. NUSA TIARA',
   'PI AREA',
   'PT. CAKRABUANA CONSULTANTS',
   'PT. PricewaterhouseCoopers Consulting Indonesia',
   'CV FINDELAU YAHEAI',
   'PT. Miskat Alam Pro',
   'PT. Binadaya Inti D

## Convert into data frame

In [40]:
# Dictionary of data
dict_data = dict_full['139119']
# Add tender code
dict_data['Kode Tender'] = ['139119'] * len(dict_full['139119']['No'])

In [46]:
# Create a data frame
df = pd.DataFrame(
        data = dict_data
)

In [47]:
print('Dimension: {} rows and {} columns'.format(len(df), len(df.columns)))
df.head()

Dimension: 42 rows and 6 columns


Unnamed: 0,No,Nama Peserta,NPWP,Harga Penawaran,Harga Terkoreksi,Kode Tender
0,1,PT. PILAR CIPTA SOLUSI INTEGRATIKA,75.056.654.9-542.000,"Rp. 530.992.000,00","Rp. 530.992.000,00",7345119
1,2,PT. Transtellar Inti Mitra,72.334.802.5-061.000,"Rp. 537.900.000,00","Rp. 537.900.000,00",7345119
2,3,PT. IP NETWORK SOLUSINDO,21.009.448.8-028.000,,,7345119
3,4,PT. GEOJAYA TEHNIK,01.313.175.0-014.000,,,7345119
4,5,PT. Royston Advisory Indonesia,02.742.014.0-072.000,,,7345119
