# 1.2.1 List of stocks



## Data source:

ข้อมูลบริษัท/หลักทรัพย์ -> ตราสารทุน -> รายชื่อบริษัท / หลักทรัพย์

https://www.set.or.th/th/company/companylist.html

รายละเอียด รายชื่อบริษัทจดทะเบียนในตลาดหลักทรัพย์ (ปรับปรุงข้อมูลทุกวันทำการแรกของแต่ละสัปดาห์) https://www.set.or.th/dat/eod/listedcompany/static/listedCompanies_th_TH.xls


English language
https://www.set.or.th/dat/eod/listedcompany/static/listedCompanies_en_US.xls


The Excel file **securities.xls** contains the list of all ticker symbols. It was downloaded from the Stock Exchange of Thailand website. The output of this process is CSV file **stock_ticker.csv**.

## Process

1. Download data from URL
2. Decode data from TIS-620
3. Parse HTML content to XML tree
4. Use xpath to extract each row and transform
5. Filter only SET
6. Show data to inspect data frame
7. Output saved to 'stock_ticker.csv'


### Required Python Modules

~~~
requests
pandas
lxml
~~~

In [1]:
# Import required library

import pandas as pd
import requests
from lxml import etree

In [2]:
# Download data

# src = "https://www.set.or.th/dat/eod/listedcompany/static/listedCompanies_th_TH.xls"

src = "https://www.set.or.th/dat/eod/listedcompany/static/listedCompanies_en_US.xls"

r = requests.get(src)
content = r.content.decode('tis-620')

r.status_code, content[:300] # sample first 300 characters

(200,
 '<table cellSpacing="0" cellPadding="0" width="100%" border="0">\n  <tr>\n    <td colspan="6" align="left"><strong>List of  Listed Companies & Contact Information</strong></td>\n    <td colspan="4" align="right"><strong>As of 30 Nov 2020<strong></td>\n  </tr>\n  <tr align="center" bgcolor="#C0C0C0"> \n    ')

In [3]:
# Decode and parse

html = etree.HTML(r.content.decode('tis-620'))

In [21]:
# Extract rows

stocks = {}

class Company:
    def __init__(self, row):
        self._row = row
        self.exchange = self._cell_data(2).strip().upper()
        self.symbol = self._cell_data(0).strip().upper()
        self.name = self._cell_data(1).strip()
        self.industry = self._cell_data(3).strip()
        self.sector = self._cell_data(4).strip()
        self.address = self._cell_data(5).strip()
        self.zipcode = self._cell_data(6).strip()
        self.telephone = self._cell_data(7).strip()
        self.fax = self._cell_data(8).strip()
        self.zipcode = self._cell_data(9).strip()
        self._row = row
    def _cell_data(self, column_number):
        row = self._row
        try:
            if row.getchildren()[column_number].text is None:
                return ""
            return row.getchildren()[column_number].text
        except:
            return ""
        
    def __str__(self):
        return """<Company %s:%s >""" % (self.exchange, self.symbol,)

for row in html.xpath('//tr'):
    print(row.getchildren()[0].text,len(row.getchildren()))
    if (len(row.getchildren()) >= 5) and (row.getchildren()[0].text is not None):
        try:
            company = Company(row)
            print(company)
            stocks[company.symbol] = company
        except Exception as e:
            print("Error for %s %s" % (str(e), row))

None 2
None 10
2S                   10
<Company MAI:2S >
3K-BAT               10
<Company SET:3K-BAT >
7UP                  10
<Company SET:7UP >
A                    10
<Company SET:A >
A5                   10
<Company MAI:A5 >
AAV                  10
<Company SET:AAV >
ABICO                10
<Company MAI:ABICO >
ABM                  10
<Company MAI:ABM >
ABPIF                10
<Company SET:ABPIF >
ACAP                 10
<Company MAI:ACAP >
ACC                  10
<Company SET:ACC >
ACE                  10
<Company SET:ACE >
ACG                  10
<Company SET:ACG >
ADB                  10
<Company MAI:ADB >
ADVANC               10
<Company SET:ADVANC >
AEC                  10
<Company SET:AEC >
AEONTS               10
<Company SET:AEONTS >
AF                   10
<Company MAI:AF >
AFC                  10
<Company SET:AFC >
AGE                  10
<Company SET:AGE >
AH                   10
<Company SET:AH >
AHC                  10
<Company SET:AHC >
AI                   10
<Compan

<Company SET:SSP >
SSPF                 10
<Company SET:SSPF >
SSSC                 10
<Company SET:SSSC >
SST                  10
<Company SET:SST >
SSTRT                10
<Company SET:SSTRT >
STA                  10
<Company SET:STA >
STANLY               10
<Company SET:STANLY >
STAR                 10
<Company MAI:STAR >
STARK                10
<Company SET:STARK >
STC                  10
<Company MAI:STC >
STEC                 10
<Company SET:STEC >
STGT                 10
<Company SET:STGT >
STHAI                10
<Company SET:STHAI >
STI                  10
<Company MAI:STI >
STPI                 10
<Company SET:STPI >
SUC                  10
<Company SET:SUC >
SUN                  10
<Company MAI:SUN >
SUPER                10
<Company SET:SUPER >
SUPEREIF             10
<Company SET:SUPEREIF >
SUSCO                10
<Company SET:SUSCO >
SUTHA                10
<Company SET:SUTHA >
SVH                  10
<Company SET:SVH >
SVI                  10
<Company SET:SVI >
SVOA     

In [22]:
"Total tickers", len(stocks)

('Total tickers', 807)

In [23]:
all_stocks = {}
for symbol, company in stocks.items():
  all_stocks[symbol] = {
        'symbol': company.symbol,
        'exchange': company.exchange,
        'industry': company.industry,
        'sector': company.sector,
        'name': company.name
  }  


In [24]:
all_symbols = pd.DataFrame(all_stocks)
stocks = all_symbols.T
stocks
# SET_stocks = stocks[stocks['exchange']=='SET']


SET_stocks = stocks
SET_stocks

Unnamed: 0,symbol,exchange,industry,sector,name
2S,2S,MAI,Industrial,-,2S METAL PUBLIC COMPANY LIMITED
3K-BAT,3K-BAT,SET,Industrials,Automotive,THAI ENERGY STORAGE TECHNOLOGY PUBLIC COMPANY ...
7UP,7UP,SET,Resources,Energy & Utilities,SEVEN UTILITIES AND POWER PUBLIC COMPANY LIMITED
A,A,SET,Property & Construction,Property Development,AREEYA PROPERTY PUBLIC COMPANY LIMITED
A5,A5,MAI,Services,-,ASSET FIVE GROUP PUBLIC COMPANY LIMITED
...,...,...,...,...,...
YGG,YGG,MAI,Services,-,YGGDRAZIL GROUP PUBLIC COMPANY LIMITED
YUASA,YUASA,MAI,Industrial,-,YUASA BATTERY (THAILAND) PUBLIC COMPANY LIMITED
ZEN,ZEN,SET,Agro & Food Industry,Food & Beverage,ZEN CORPORATION GROUP PUBLIC COMPANY LIMITED
ZIGA,ZIGA,MAI,Industrial,-,ZIGA INNOVATION PUBLIC COMPANY LIMITED


In [27]:
# Check lookup specific stock symbol.

stocks.loc[stocks['symbol']=='BAT3K']

Unnamed: 0,symbol,exchange,industry,sector,name


In [26]:
output_file = "stock_ticker.csv"

SET_stocks.to_csv(output_file)


SET_stocks

Unnamed: 0,symbol,exchange,industry,sector,name
2S,2S,MAI,Industrial,-,2S METAL PUBLIC COMPANY LIMITED
3K-BAT,3K-BAT,SET,Industrials,Automotive,THAI ENERGY STORAGE TECHNOLOGY PUBLIC COMPANY ...
7UP,7UP,SET,Resources,Energy & Utilities,SEVEN UTILITIES AND POWER PUBLIC COMPANY LIMITED
A,A,SET,Property & Construction,Property Development,AREEYA PROPERTY PUBLIC COMPANY LIMITED
A5,A5,MAI,Services,-,ASSET FIVE GROUP PUBLIC COMPANY LIMITED
...,...,...,...,...,...
YGG,YGG,MAI,Services,-,YGGDRAZIL GROUP PUBLIC COMPANY LIMITED
YUASA,YUASA,MAI,Industrial,-,YUASA BATTERY (THAILAND) PUBLIC COMPANY LIMITED
ZEN,ZEN,SET,Agro & Food Industry,Food & Beverage,ZEN CORPORATION GROUP PUBLIC COMPANY LIMITED
ZIGA,ZIGA,MAI,Industrial,-,ZIGA INNOVATION PUBLIC COMPANY LIMITED


# SET50 and SET100 sample for pilot


SET 50  https://www.settrade.com/C13_MarketSummary.jsp?detail=SET50

SET 100 https://www.settrade.com/C13_MarketSummary.jsp?detail=SET100


In [None]:
import lxml, requests
from lxml import html

src = "https://www.settrade.com/C13_MarketSummary.jsp?detail=SET50"
r = requests.get(src)
content = r.content.decode('tis-620')

_html = html.fromstring(content)
# _html.xpath("""//*[@id="maincontent"]/div/div[1]/div[1]/div/div[2]/div[3]/div/div/div/div/table/tbody/tr[15]/td[1]/a""")
links = _html.xpath("""//a[contains(@class,"link-stt") and contains(@href,'C13')]""") # link for each symbols

In [None]:
# links[0].getparent().getparent().tag  # 'tr'

symbols = []
for link in links:
    symbol = link.getparent().text_content().strip()
    symbols.append(symbol)

In [None]:
symbols

In [None]:
stocks = ['ADVANC',
 'AOT',
 'AWC',
 'BANPU',
 'BBL',
 'BDMS',
 'BEM',
 'BGRIM',
 'BH',]

In [None]:
stocks

In [None]:
import pandas_datareader as pdr
import datetime as dt


stocks = ['ADVANC', 'AOT', 'AWC', 'BANPU', 'BBL', 'BDMS', 'BEM', 'BGRIM', 'BH']
start = dt.datetime(2014,1,1)

dataset = {}
for s in stocks:
    try:
        data = pdr.get_data_yahoo('%s.BK' % s,start=start)
        dataset[s] = data
    except Exception as e:
        print("Error: %s %s" % (s, str(e)))

In [None]:
dataset