# Scraping Data From Yahoo Finance with BeautifulSoup4


Data scraping, also known as web scraping, is the process of importing information from a website into a spreadsheet. In this project, we will scrape data 

In [9]:
import requests 
from bs4 import BeautifulSoup
import bs4
import numpy as np
import time
import pandas as pd
import datetime

In [14]:
#Define stock code list
LQ45 = ["ACES", "ADRO", "AKRA", "ANTM", "ASII", "BBCA", "BBNI", "BBRI", "BBTN",  "BMRI", "BSDE", 
              "BTPS", "CPIN","CTRA", "ERAA", "EXCL", "GGRM", "HMSP", "ICBP", "INCO", "INDF","INKP", "INTP", 
              "ITMG", "JPFA", "JSMR","KLBF", "MDKA", "MIKA",  "MNCN", "PGAS", "PTBA", "PTPP", "PWON", "SCMA", 
              "SMGR", "SMRA", "SRIL", "TBIG", "TKIM", "TLKM", "TOWR", "UNTR", "UNVR", "WIKA"]
data = {'market_cap':[],'beta':[],'PE':[],'EPS':[],'cash':[], 'BVPS':[], 'price':[]}
USDtoIDR = 14100


In [16]:
for x in LQ45:
    # Yahoo Finance URL 
    url = "https://finance.yahoo.com/quote/{}.JK?p={}.JK&.tsrc=fin-srch".format(
            x, x)
    
    # Get Request from YF
    r = requests.get(url)
    soup = bs4.BeautifulSoup(r.text, 'html.parser')
    
    # Get Market Cap, Beta, PE, EPS data
    market_cap = soup.find('div', {'class': 'D(ib) W(1/2) Bxz(bb) Pstart(12px) Va(t) ie-7_D(i) ie-7_Pos(a) smartphone_D(b) smartphone_W(100%) smartphone_Pstart(0px) smartphone_BdB smartphone_Bdc($seperatorColor)'}
                           ).find_all('span', {'class': 'Trsdu(0.3s)'})[0].text
    beta = soup.find('div', {'class': 'D(ib) W(1/2) Bxz(bb) Pstart(12px) Va(t) ie-7_D(i) ie-7_Pos(a) smartphone_D(b) smartphone_W(100%) smartphone_Pstart(0px) smartphone_BdB smartphone_Bdc($seperatorColor)'}
                     ).find_all('span', {'class': 'Trsdu(0.3s)'})[1].text
    PE = soup.find('div', {'class': 'D(ib) W(1/2) Bxz(bb) Pstart(12px) Va(t) ie-7_D(i) ie-7_Pos(a) smartphone_D(b) smartphone_W(100%) smartphone_Pstart(0px) smartphone_BdB smartphone_Bdc($seperatorColor)'}
                   ).find_all('span', {'class': 'Trsdu(0.3s)'})[2].text.replace(',', '.')
    EPS = soup.find('div', {'class': 'D(ib) W(1/2) Bxz(bb) Pstart(12px) Va(t) ie-7_D(i) ie-7_Pos(a) smartphone_D(b) smartphone_W(100%) smartphone_Pstart(0px) smartphone_BdB smartphone_Bdc($seperatorColor)'}
                    ).find_all('span', {'class': 'Trsdu(0.3s)'})[3].text.replace(',', '.')
    price = soup.find('div', {'class': 'D(ib) Va(m) Maw(65%) Ov(h)'}).find_all('span', {
            'class': 'Trsdu(0.3s) Fw(b) Fz(36px) Mb(-4px) D(ib)'})[0].text.replace('.00', '').replace(',', '')
    
    
    
    # Get Cash , BVPS data from url2

    url2 = 'https://finance.yahoo.com/quote/{}.JK/key-statistics?p={}.JK'.format(
        x, x)
    r2 = requests.get(url2)
    soup2 = bs4.BeautifulSoup(r2.text, 'html.parser')
    cash = soup2.find('div', {'class': 'Mb(10px) Pend(20px) smartphone_Pend(0px)'}).find_all('div', {
        'class': 'Pos(r) Mt(10px)'})[4].find_all('td', {'class': 'Fw(500) Ta(end) Pstart(10px) Miw(60px)'})[0].text
    BVPS = soup2.find('div', {'class': 'Mb(10px) Pend(20px) smartphone_Pend(0px)'}).find_all('div', {
        'class': 'Pos(r) Mt(10px)'})[4].find_all('td', {'class': 'Fw(500) Ta(end) Pstart(10px) Miw(60px)'})[5].text
    
    # Clearing data, because data from YF have '.xx' format and thats make dataframe cant convert to int
    if len(PE) > 6:
        PE = PE.replace(PE[-3:], '')
    if len(EPS) > 6:
        EPS = EPS.replace(EPS[-3:], '')
        
    
    # Convert EPS to Indonesian Rupiah. Some Indonesian stock have USD format. This will be problem, because format not same with other stock
    if float(EPS) > 0 and float(EPS) < 1:
        EPS = float(EPS) * USDtoIDR
        PER = int(price)/float(EPS)
        
    # Change cash to Million
    if cash[-1:] == 'T':
        cash = cash.replace(cash[-1:], '')
        cash = float(cash) * 1000000000
    elif cash[-1:] == 'M':
        cash = cash.replace(cash[-1:], '')
        cash = float(cash) * 1000000
    elif cash[-1:] == 'B':
        cash = cash.replace(cash[-1:], '')
        cash = float(cash)
        
    
    
    # Change market cap format
    market_cap = market_cap.replace(market_cap[-1:], '')
    market_cap = float(market_cap) * 1000000000
    
    
     # Add data to Dict
    data['market_cap'].append(market_cap)
    data['beta'].append(beta)
    data['PE'].append(PE)
    data['price'].append(price)
    data['EPS'].append(EPS)
    data['cash'].append(cash)
    data['BVPS'].append(BVPS)
    
    print('Proces ' + x)
    
    time.sleep(1)





    

Proces ACES
Proces ADRO
Proces AKRA
Proces ANTM
Proces ASII
Proces BBCA
Proces BBNI
Proces BBRI
Proces BBTN
Proces BMRI
Proces BSDE
Proces BTPS
Proces CPIN
Proces CTRA
Proces ERAA
Proces EXCL
Proces GGRM
Proces HMSP
Proces ICBP
Proces INCO
Proces INDF
Proces INKP
Proces INTP
Proces ITMG
Proces JPFA
Proces JSMR
Proces KLBF
Proces MDKA
Proces MIKA
Proces MNCN
Proces PGAS
Proces PTBA
Proces PTPP
Proces PWON
Proces SCMA
Proces SMGR
Proces SMRA
Proces SRIL
Proces TBIG
Proces TKIM
Proces TLKM
Proces TOWR
Proces UNTR
Proces UNVR
Proces WIKA


In [17]:
stock_data = pd.DataFrame(data)

In [18]:
stock_data

Unnamed: 0,market_cap,beta,PE,EPS,cash,BVPS,price
0,3.041600e+10,0.57,36.31,49.03,1.780000e+09,283.67,1780
1,4.765900e+10,1.09,4966,0.00,1.340000e+00,0.11,1490
2,1.279000e+10,1.02,15.73,205.97,1.370000e+09,2156.69,3240
3,8.605000e+09,1.48,110.86,16.15,3.670000e+09,787.71,1790
4,2.429020e+11,0.69,12.22,491.00,4.767000e+10,3821.97,6000
...,...,...,...,...,...,...,...
59,3.288870e+11,0.28,17.42,190.62,1.777000e+10,1026.80,3320
60,4.844800e+10,0.68,18.14,53.21,1.570000e+09,193.28,965
61,9.726300e+10,0.65,12.14,2.147,1.946000e+10,16283.37,26075
62,2.832640e+11,0.22,38.69,191.91,6.647400e+02,170.01,7425
