Use this notebook to produce table and figure for the Research Data section of the write-up.

Data file downloaded from Datastream is SET100_Data.xlsx  



Table 1. List of 160 stocks  
Table 2. Description of historical data attributes  
Table 3. Summary statistics of technical indicator values  
Table 4. List of dates the SET100 index were updated and the constituent list published  
Table 5. List of stocks added and removed at each index update.  
Table 6. Description of stock tickers that no longer exists.  
Table 7. Observation count of each stock time series.  

In [1]:
import sys, os, os.path
if not 'EXPERIMENT_HOME' in os.environ: # Configure local paths if not already defined
    ML_HOME = os.path.abspath(os.path.join("/home/jovyan", "ml_home")) # ML workspace
    EXPERIMENT_HOME = os.path.abspath(os.path.join(ML_HOME, "..")) # Experiment workspace
    DATA_HOME = os.path.abspath(os.path.join(EXPERIMENT_HOME,"dataset")) # Dataset location
    os.environ["EXPERIMENT_HOME"] = EXPERIMENT_HOME
    os.environ["DATA_HOME"] = DATA_HOME
    sys.path.insert(0, EXPERIMENT_HOME)
    sys.path.insert(0, ML_HOME) # Add to path so can load our library
    os.chdir(EXPERIMENT_HOME) # Change working directory to experiment workspace
print("Experiment Home: ", os.path.abspath(os.curdir), "; Data Home:", DATA_HOME)

Experiment Home:  /home/jovyan ; Data Home: /home/jovyan/dataset


In [2]:
from datetime import datetime
import numpy as np
import pandas as pd
import pandas_datareader as pdr
import datetime as dt
import xlrd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html
# Get worksheet names
workbook = pd.read_excel('SET100_Data.xlsm')
workbook.keys()

In [3]:


# First sheet list all companies
# VO sheet contains VO data

# Row 3  LOC;  Row 4  Datatype  Row 5 Name

os.chdir("/home/jovyan/dataset/Datastream")
sheets = pd.read_excel('SET100_Data.xlsm', sheet_name=[0,'VO','MV','P','MACD']) 

In [4]:
sheets.keys()

odict_keys([0, 'VO', 'MV', 'P', 'MACD'])

In [5]:
workbook = pd.read_excel('SET100_Data.xlsm')
workbook.keys()

Index(['Unnamed: 0', 'Symbol In SET100 Constituent', 'Company Name',
       'Datastream Mnemonic', 'Remark'],
      dtype='object')

In [11]:
# List of ticker symbols
symbols = list(sheets[0]['Symbol In SET100 Constituent'].values)

# Data frame
df_tickers = sheets[0][['Symbol In SET100 Constituent', 'Company Name', 'Datastream Mnemonic', 'Remark']]
df_tickers # All 163 stock symbols

Unnamed: 0,Symbol In SET100 Constituent,Company Name,Datastream Mnemonic,Remark
0,AAV,ASIA AVIATION,Q:AAVP,
1,ADVANC,ADVANCED INFO SER.,Q:AIS,
2,AEONTS,AE.THANA SINSAP (THAI.),Q:ATS,
3,AMATA,AMATA,Q:PAKO,
4,ANAN,ANANDA DEVELOPMENT,Q:ADTP,
...,...,...,...,...
158,VIBHA,VIBHAVADI MEDICAL,Q:VIBH,
159,VNG,VANACHAI GROUP,Q:VANA,
160,WHA,WHA,Q:WHAP,
161,WHAUP,WHA UTILITIES AND POWER,Q:WHAU,


In [21]:
df_tickers[df_tickers["Symbol In SET100 Constituent"]=="SIM"]

Unnamed: 0,Symbol In SET100 Constituent,Company Name,Datastream Mnemonic,Remark
122,SIM,SAMART DIGITAL ORS,Q:SIMB,NEW SYMBOL (SDC)


In [15]:
# The 160 companies
df_companies = df_tickers[df_tickers['Datastream Mnemonic'].notnull()]
df_companies

Unnamed: 0,Symbol In SET100 Constituent,Company Name,Datastream Mnemonic,Remark
0,AAV,ASIA AVIATION,Q:AAVP,
1,ADVANC,ADVANCED INFO SER.,Q:AIS,
2,AEONTS,AE.THANA SINSAP (THAI.),Q:ATS,
3,AMATA,AMATA,Q:PAKO,
4,ANAN,ANANDA DEVELOPMENT,Q:ADTP,
...,...,...,...,...
158,VIBHA,VIBHAVADI MEDICAL,Q:VIBH,
159,VNG,VANACHAI GROUP,Q:VANA,
160,WHA,WHA,Q:WHAP,
161,WHAUP,WHA UTILITIES AND POWER,Q:WHAU,


In [18]:
# The SET100 companies with the sector
sector_lookup = os.path.join(os.environ['EXPERIMENT_HOME'],"""1.0 Data Acquisition/stock_ticker.csv""")
sector_lookup = pd.read_csv(sector_lookup)

set100_companies = df_companies[['Symbol In SET100 Constituent', 'Company Name']]
set100_companies

# Join
set100_companies.merge(sector_lookup, left_on='Symbol In SET100 Constituent', right_on='symbol')

Unnamed: 0.1,Symbol In SET100 Constituent,Company Name,Unnamed: 0,symbol,exchange,industry,sector,name
0,AAV,ASIA AVIATION,AAV,AAV,SET,Services,Transportation & Logistics,ASIA AVIATION PUBLIC COMPANY LIMITED
1,ADVANC,ADVANCED INFO SER.,ADVANC,ADVANC,SET,Technology,Information & Communication Technology,ADVANCED INFO SERVICE PUBLIC COMPANY LIMITED
2,AEONTS,AE.THANA SINSAP (THAI.),AEONTS,AEONTS,SET,Financials,Finance & Securities,AEON THANA SINSAP (THAILAND) PUBLIC COMPANY LI...
3,AMATA,AMATA,AMATA,AMATA,SET,Property & Construction,Property Development,AMATA CORPORATION PUBLIC COMPANY LIMITED
4,ANAN,ANANDA DEVELOPMENT,ANAN,ANAN,SET,Property & Construction,Property Development,ANANDA DEVELOPMENT PUBLIC COMPANY LIMITED
...,...,...,...,...,...,...,...,...
143,VIBHA,VIBHAVADI MEDICAL,VIBHA,VIBHA,SET,Services,Health Care Services,VIBHAVADI MEDICAL CENTER PUBLIC COMPANY LIMITED
144,VNG,VANACHAI GROUP,VNG,VNG,SET,Property & Construction,Construction Materials,VANACHAI GROUP PUBLIC COMPANY LIMITED
145,WHA,WHA,WHA,WHA,SET,Property & Construction,Property Development,WHA CORPORATION PUBLIC COMPANY LIMITED
146,WHAUP,WHA UTILITIES AND POWER,WHAUP,WHAUP,SET,Resources,Energy & Utilities,WHA UTILITIES AND POWER PUBLIC COMPANY LIMITED


In [None]:
VO = sheets['VO']

localCode = VO[2:3]  # Stock symbol
companyName = VO[4:5]  # Name
bDate = VO[5:6] # dataAvailableFrom
dbEntityCode = VO[6:7] # internal database code 
df_VO = VO[7:]   # Data
df_VO.columns = companyName.values[0] # Set local code as column header
df_VO = df_VO.set_index(df_VO.columns[0]) # Make index on date column

In [None]:
df_VO

Get stocks in the dataset folder. The stocks are indexed in _directory.csv.

In [None]:
os.chdir("/home/jovyan/dataset/set100.data")
stocks = pd.read_csv('_directory.csv', index_col=0)
print("Stocks count:", len(stocks))
stocks.tail()

Construct an in-memory dataframe containing all stocks time-series.

In [None]:
period_start='2015-01-01'
period_end='2019-12-31'
series = {}

# Set index for data series to the date column
# Add column ticker, sector, industry, observation count 
#   to each data series and combine to single dictionary
for s in stocks.symbol.values:
    df = pd.read_csv("%s.csv" % s, index_col=0)
    df.loc[:,('Date')] = df.index
    df['Ticker'] = s
    sector = stocks[stocks['symbol']==s].sector.values[0]
    df['Sector'] = sector
    industry = stocks[stocks['symbol']==s].industry.values[0]
    df['Industry'] = industry
    df['Observations'] = len(df)
    series[s] = df[period_start:period_end]
# The dictionary 'series' contains time-series with key ticker.
# Stack dataframes together as one dataframe.
rows = []
for k, d in series.items():
    rows.append(d)
df = pd.concat(rows, ignore_index=True)

In [None]:
# Pivot table: Value=Observations; the number of data observations for the stock.
ptable = df.pivot_table(values='Observations', index='Date', columns=['Industry', 'Sector', 'Ticker'])
ptable.tail()

In [None]:
# Observations count at last date of the dataset.
table_7 = ptable['2019-12-31':].T
table_7

In [None]:
table_7.to_csv('table_7.csv')

In [None]:

# table = df.pivot(columns='Ticker')['Observations'] # Pivot table
# table

# table_7.pivot()['Ticker']

In [None]:
# output = os.path.join(os.environ['EXPERIMENT_HOME'],'tables','table_7')
# table_7.to_csv(output)

# table_7[['Ticker']].index
table_7.columns

In [None]:
# Get ticker from multiindex.
# tickers = np.array([i[2] for i in table_7.index.values])

fig, ax = plt.subplots(figsize=(8,18))
table_7.plot.barh(ax=ax, legend=False) # , xticks=tickers to change ticker

ax.tick_params(axis='y', which='major', labelsize=10)
ax.tick_params(axis='y', which='minor', labelsize=8)
plt.xlabel('Observations Count (Closing Price)')
plt.title("SET100 Stocks: Closing price observations during 2015-2019")