# Part 1: Get list of all company in Bursa Main Market

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import numpy as np
#added numpy

#Manually get ticker symbol of all companies
#We are focusing on main market
#Since there are 50 pages, we will loops across 50 pages. Use loop to generate URL for 50 pages

link_ticker =[]

for i in range(1,51):
    website_url = ('https://www.bursamalaysia.com/market_information/equities_prices?keyword=&top_stock=&board=MAIN-MKT&alphabetical=&sector=&sub_sector=&page='+str(i))
    link_ticker.append(website_url)


#Parse through all the pages and get the data
frames = []
for link in link_ticker:
    reso = requests.get(link)
    if reso.status_code == 404:
        print ("No such code" + link)
    else:
        soup = BeautifulSoup(reso.text,'lxml')
        table = soup.find('table', {'class':'table datatable-striped text-center equity_prices_table datatable-with-sneak-peek js-anchor-price-table'})
        df = pd.read_html(str(table), header=0)
        df[0].rename(index=str, inplace = True)
        frames.append(df[0].dropna(thresh=3))

stock_list = pd.concat(frames)
stock_list

Unnamed: 0,No,Name,Code,REM,Last Done,LACP,CHG,%CHG,Vol ('00),BUY Vol ('00),BUY,SELL,SELL Vol ('00),HIGH,LOW,stock_id
0,1,MINETEC [S],7219,,0.31,0.305,+0.005,+1.64,1214286,59584,0.305,0.31,20550,0.32,0.295,7219
1,2,KNM,7164,,0.25,0.245,+0.005,+2.04,998109,21160,0.245,0.25,132033,0.25,0.23,7164
2,3,DSONIC [S],5216,,1.52,1.400,+0.120,+8.57,923538,286,1.51,1.52,10820,1.53,1.35,5216
3,4,MYEG [S],0138,,1.53,1.500,+0.030,+2.00,843013,7701,1.53,1.54,2290,1.57,1.46,0138
4,5,PDZ [S],6254,,0.04,0.030,+0.010,+33.33,809466,204958,0.035,0.04,204347,0.045,0.03,6254
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14,975,WIDETEC,7692,,-,0.530,-,-,-,30,0.530,0.560,50,-,-,7692
15,976,WOODLAN [S],7025,,-,0.550,-,-,-,12,0.435,0.545,23,-,-,7025
16,977,Y&G [S],7003,,-,0.720,-,-,-,50,0.640,0.700,3,-,-,7003
17,978,YONGTAI-PA [S],7066PA,,-,0.060,-,-,-,500,0.060,0.085,867,-,-,7066PA


In [11]:
#We have the stock list, now we will extract the stock_id
#We only pick the number because we want to remove warrant

stock_list['ticker_no'] = stock_list['stock_id'].str[:4]
stock_list
ticker_list = stock_list['ticker_no']
ticker_list

#remove duplicate from ticker_list and append to list
ticker_list = ticker_list.drop_duplicates().tolist()

# Part 2: Scrape the financial info all the company

In [3]:
#get the list of URL first

url_all =[]

for i in ticker_list:
    website_url = ('https://www.klsescreener.com/v2/stocks/view/'+str(i))
    url_all.append(website_url)


In [4]:
#get the data and append in data format
frames = []
for link in url_all:
    reso = requests.get(link)
    if reso.status_code == 404:
        print ("Page not found: " + link)
    else:
        soup = BeautifulSoup(reso.text,'lxml')
        table = soup.find('table', {'class':'financial_reports table table-hover table-sm table-theme'})
        df = pd.read_html(str(table), header=0)
        df[0].rename(index= str, inplace = True)
        frames.append(df[0].assign(ticker=link[-4:]))
        
df2 = pd.concat(frames)
df2

df2.to_csv('df2.csv')

Page not found: https://www.klsescreener.com/v2/stocks/view/5235
Page not found: https://www.klsescreener.com/v2/stocks/view/nan


In [8]:
df2

Unnamed: 0,EPS,DPS,NTA,Revenue,P/L,Quarter,Q Date,Financial Date,Announced,Net%,Report,ticker
0,0.13,0.0,0.08,"19,198k","1,180k",3,2019-12-31,2020-03-31,2020-02-26,153.8%,View,7219
1,0.05,0.0,0.08,"18,545k",492k,2,2019-09-30,2020-03-31,2019-11-27,132.6%,View,7219
2,-0.13,0.0,0.08,"28,165k","-1,221k",1,2019-06-30,2020-03-31,2019-08-28,24.1%,View,7219
3,-1.46,0.0,0.08,"30,775k","-10,643k",4,2019-03-31,2019-03-31,2019-05-31,736.9%,View,7219
4,-0.30,0.0,0.09,"38,595k","-2,192k",3,2018-12-31,2019-03-31,2019-02-27,96.1%,View,7219
...,...,...,...,...,...,...,...,...,...,...,...,...
65,-2.00,0.0,0.70,"4,407k","-1,019k",3,2003-09-30,2003-12-21,2003-11-21,6.3%,View,7003
66,-2.12,0.0,0.72,"3,014k","-1,081k",2,2003-06-30,2003-12-31,2003-08-27,22.1%,View,7003
67,-5.09,0.0,0.74,"7,999k","-2,598k",1,2003-03-31,2003-12-31,2003-05-26,40.1%,View,7003
68,-6.53,0.0,0.81,"10,247k","-3,332k",4,2002-12-31,2002-12-31,2003-02-28,66.9%,View,7003


In [7]:
 df2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 46432 entries, 0 to 69
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   EPS             46432 non-null  float64
 1   DPS             46432 non-null  float64
 2   NTA             46432 non-null  float64
 3   Revenue         46432 non-null  object 
 4   P/L             46432 non-null  object 
 5   Quarter         46432 non-null  object 
 6   Q Date          46432 non-null  object 
 7   Financial Date  46432 non-null  object 
 8   Announced       46432 non-null  object 
 9   Net%            46432 non-null  object 
 10  Report          46432 non-null  object 
 11  ticker          46432 non-null  object 
dtypes: float64(3), object(9)
memory usage: 4.6+ MB
