# Web Scraping LPSE - Vendor (Winner) Data

---

For introduction of Selenium, please be kind to open [**this site**](https://www.scrapingbee.com/blog/selenium-python/)

## Import modules

`%pip freeze > requirements.txt`

In [22]:
# Module for web scraping
from selenium import webdriver
# Module for data manipulation
import pandas as pd
from bs4 import BeautifulSoup

## Load the Chromedriver

Read how to download webdriver for Chrome [**here**]('https://chromedriver.chromium.org/downloads')

In [23]:
# Main link
main_link = 'https://lpse.lkpp.go.id/eproc4/lelang/139119/pengumumanlelang'

In [24]:
# Access to main link
DRIVER_PATH = '../bin/chromedriver'
driver = webdriver.Chrome(executable_path = DRIVER_PATH)
driver.get(main_link)

## Core Procedure

### 1 Get links

In [25]:
# Get links
links = driver.find_elements_by_class_name('nav-link')
listLink = [link.get_attribute('href') for link in links]

In [26]:
# List of link
navName = ['Pengumuman', 'Peserta', 'Hasil Evaluasi', 'Pemenang', 'Pemenang Berkontrak']
dictLink = dict(zip(navName, listLink))
dictLink

{'Pengumuman': 'https://lpse.lkpp.go.id/eproc4/lelang/139119/pengumumanlelang',
 'Peserta': 'https://lpse.lkpp.go.id/eproc4/lelang/139119/peserta',
 'Hasil Evaluasi': 'https://lpse.lkpp.go.id/eproc4/evaluasi/139119/hasil',
 'Pemenang': 'https://lpse.lkpp.go.id/eproc4/evaluasi/139119/pemenang',
 'Pemenang Berkontrak': 'https://lpse.lkpp.go.id/eproc4/evaluasi/139119/pemenangberkontrak'}

### 2 Get information of winner

#### Get link

In [27]:
# Link of winners
linkWinner = dictLink['Pemenang']
linkWinner

'https://lpse.lkpp.go.id/eproc4/evaluasi/139119/pemenang'

In [28]:
# Access to winner's link
driver.get(linkWinner)

#### Get column names and values - 1

In [29]:
# Get the column elements
winnerSummaryData = driver.find_element_by_class_name('content')

In [30]:
# Data collections
dataCollectionFirst = winnerSummaryData.find_element_by_tag_name('tbody').find_elements_by_tag_name('tr')
print('Length of data in one page: {} rows'.format(len(dataCollectionFirst)))

Length of data in one page: 9 rows


In [31]:
# Get column names
listColNamesFirst = []
for idx in range(len(dataCollectionFirst) - 3):
    colNames = dataCollectionFirst[idx].find_elements_by_tag_name('th')[0].text
    listColNamesFirst.append(colNames)
# Result
listColNamesFirst

['Nama Tender', 'Jenis Pengadaan', 'Agency', 'Satuan Kerja', 'Pagu', 'HPS']

In [32]:
# Get values
listValuesFirst = []
for idx in range(len(dataCollectionFirst) - 3):
    value = dataCollectionFirst[idx].find_elements_by_tag_name('td')[0].text
    listValuesFirst.append(value)
# Result
listValuesFirst

['1 (satu) Paket Peralatan Observasi Geimagnet Landas Bumi',
 'Pengadaan Barang',
 'Lembaga Penerbangan Dan Antariksa Nasional (LAPAN)',
 'Pusat Sains Antariksa',
 'Rp. 1.556.954.000,00',
 'Rp. 1.556.849.700,00']

#### Get column names and values - 2

In [33]:
# Data collections
dataCollectionSecond = winnerSummaryData.find_element_by_tag_name('tbody').find_element_by_tag_name('tbody')

In [34]:
# Get column names
listColNamesSecond = []

dataCollectionSecondCol = dataCollectionSecond.find_elements_by_tag_name('th')
for elem in dataCollectionSecondCol:
    colNames = elem.text
    listColNamesSecond.append(colNames)
# Result
listColNamesSecond

['Nama Pemenang', 'Alamat', 'NPWP', 'Harga Penawaran', 'Harga Terkoreksi']

In [35]:
# Get values
listValuesSecond = []

dataCollectionSecondVal = dataCollectionSecond.find_elements_by_tag_name('td')
for val in dataCollectionSecondVal:
    value = val.text
    listValuesSecond.append(value)
# Result
listValuesSecond

['CV.CAHAYA BHAKTI',
 'Jl. Anyer 37 - Bandung (Kota) - Jawa Barat',
 '01.822.785.0-424.000',
 'Rp. 1.428.460.000,00',
 '']

### 3 Data preprocessing

In [36]:
# Column names
listColNamesFull = listColNamesFirst + listColNamesSecond
listColNamesFull

['Nama Tender',
 'Jenis Pengadaan',
 'Agency',
 'Satuan Kerja',
 'Pagu',
 'HPS',
 'Nama Pemenang',
 'Alamat',
 'NPWP',
 'Harga Penawaran',
 'Harga Terkoreksi']

In [37]:
# Values
listValuesFull = listValuesFirst + listValuesSecond
listValuesFull

['1 (satu) Paket Peralatan Observasi Geimagnet Landas Bumi',
 'Pengadaan Barang',
 'Lembaga Penerbangan Dan Antariksa Nasional (LAPAN)',
 'Pusat Sains Antariksa',
 'Rp. 1.556.954.000,00',
 'Rp. 1.556.849.700,00',
 'CV.CAHAYA BHAKTI',
 'Jl. Anyer 37 - Bandung (Kota) - Jawa Barat',
 '01.822.785.0-424.000',
 'Rp. 1.428.460.000,00',
 '']

## Convert JSON

In [38]:
# Add tender's code as identifier
dict_full = {
    '139119': dict(zip(
        listColNamesFull,
        listValuesFull
        )
    )
}

In [39]:
dict_full

{'139119': {'Nama Tender': '1 (satu) Paket Peralatan Observasi Geimagnet Landas Bumi',
  'Jenis Pengadaan': 'Pengadaan Barang',
  'Agency': 'Lembaga Penerbangan Dan Antariksa Nasional (LAPAN)',
  'Satuan Kerja': 'Pusat Sains Antariksa',
  'Pagu': 'Rp. 1.556.954.000,00',
  'HPS': 'Rp. 1.556.849.700,00',
  'Nama Pemenang': 'CV.CAHAYA BHAKTI',
  'Alamat': 'Jl. Anyer 37 - Bandung (Kota) - Jawa Barat',
  'NPWP': '01.822.785.0-424.000',
  'Harga Penawaran': 'Rp. 1.428.460.000,00',
  'Harga Terkoreksi': ''}}

## Convert into data frame

In [40]:
# Dictionary of data
dict_data = dict(zip(
            listColNamesFull,
            listValuesFull
            )
        )
# Add tender code
dict_data['Kode Tender'] = '7345119'

In [41]:
# Value
dict_data

{'Nama Tender': '1 (satu) Paket Peralatan Observasi Geimagnet Landas Bumi',
 'Jenis Pengadaan': 'Pengadaan Barang',
 'Agency': 'Lembaga Penerbangan Dan Antariksa Nasional (LAPAN)',
 'Satuan Kerja': 'Pusat Sains Antariksa',
 'Pagu': 'Rp. 1.556.954.000,00',
 'HPS': 'Rp. 1.556.849.700,00',
 'Nama Pemenang': 'CV.CAHAYA BHAKTI',
 'Alamat': 'Jl. Anyer 37 - Bandung (Kota) - Jawa Barat',
 'NPWP': '01.822.785.0-424.000',
 'Harga Penawaran': 'Rp. 1.428.460.000,00',
 'Harga Terkoreksi': '',
 'Kode Tender': '7345119'}

In [42]:
# Create a data frame
df = pd.DataFrame(
        data = dict_data,
        index = [0]
)
# Reorder columns
cols = df.columns
column_fix = [cols[-1]] + list(cols[:-1])
df = df[column_fix]
df = df.rename(columns = str.upper)

In [43]:
print('Dimension: {} rows and {} columns'.format(len(df), len(df.columns)))
df.head()

Dimension: 1 rows and 12 columns


Unnamed: 0,KODE TENDER,NAMA TENDER,JENIS PENGADAAN,AGENCY,SATUAN KERJA,PAGU,HPS,NAMA PEMENANG,ALAMAT,NPWP,HARGA PENAWARAN,HARGA TERKOREKSI
0,7345119,1 (satu) Paket Peralatan Observasi Geimagnet L...,Pengadaan Barang,Lembaga Penerbangan Dan Antariksa Nasional (LA...,Pusat Sains Antariksa,"Rp. 1.556.954.000,00","Rp. 1.556.849.700,00",CV.CAHAYA BHAKTI,Jl. Anyer 37 - Bandung (Kota) - Jawa Barat,01.822.785.0-424.000,"Rp. 1.428.460.000,00",
