In [1]:
import pandas as pd
import numpy as np

In [4]:
def get_twse_price(date=None, filename=None):
    import requests
    """抓取TWSE台灣證交所上市公司收盤價格
    Parameters
    ----------
    date : (str) 收盤日期 (format: yyyymmdd)
    filename : the file for storing stock prices
    """
    url = 'http://www.twse.com.tw/exchangeReport/MI_INDEX'
    values = {
        'response': 'csv',
        'date': date,
        'type': 'ALLBUT0999',
    }
    
    if date is None:
        print('Warning: Missing date')
        return
    
    if filename is None:
        filename = 'data/stock_price.csv'
    try:
        print('Downloading stock prices...')
        r = requests.post(url, values)
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(r.text)
    except:
        print('Warning: Fail to get stock prices')
    else:
        print('Status: stock prices stored in %s' % filename)

In [5]:
get_twse_price('20181203')

Downloading stock prices...
Status: stock prices stored in data/stock_price.csv


In [6]:
columns = ["證券代號","證券名稱","成交股數","成交筆數","成交金額","開盤價","最高價","最低價","收盤價","漲跌(+/-)","漲跌價差","最後揭示買價","最後揭示買量","最後揭示賣價","最後揭示賣量","本益比"]
df = pd.read_csv('data/stock_price.csv',
    header=None, names=columns, index_col=False, 
    thousands=',', quotechar= '"', skiprows=lambda x: x <= 163, encoding='utf-8')

print(df.shape)
df.head()

(1067, 16)


Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,"=""0050""",元大台灣50,13850986,7537,1088593094,77.85,79.0,77.8,78.8,+,2.05,78.8,570,78.85,9,0.0
1,"=""0051""",元大中型100,49010,33,1480675,30.0,30.33,30.0,30.25,+,0.65,30.2,2,30.25,5,0.0
2,"=""0052""",富邦科技,101805,7,5180761,50.7,50.9,50.7,50.75,+,0.99,50.0,5,50.7,1,0.0
3,"=""0053""",元大電子,6000,4,192840,31.92,32.31,31.92,32.31,+,0.82,32.31,1,32.32,1,0.0
4,"=""0054""",元大台商50,6000,6,124090,20.52,20.78,20.52,20.78,+,0.55,20.75,21,20.78,1,0.0


In [7]:
df = df[df["證券代號"].apply(lambda x: len(x) == 4)]
print(df.shape)
df.head()

(926, 16)


Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
120,1101,台泥,26938982,9128,957538297,34.95,36.0,34.9,35.85,+,1.35,35.8,13,35.85,151,9.85
121,1102,亞泥,15319913,5961,524659738,34.5,34.5,33.75,34.5,+,0.8,34.45,8,34.5,318,10.55
122,1103,嘉泥,1349239,490,19193587,14.2,14.3,14.15,14.2,+,0.1,14.2,29,14.25,7,5.02
123,1104,環泥,95651,63,1841478,19.2,19.35,19.1,19.3,+,0.35,19.25,4,19.3,26,11.03
124,1108,幸福,154000,43,1135160,7.23,7.4,7.23,7.4,,0.0,7.4,44,7.43,20,0.0


In [8]:
df = df[["證券代號","證券名稱","成交股數","開盤價","最高價","最低價","收盤價"]]
df.reset_index(drop=True, inplace=True)
df.set_index('證券代號', inplace=True)
df.head()

Unnamed: 0_level_0,證券名稱,成交股數,開盤價,最高價,最低價,收盤價
證券代號,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1101,台泥,26938982,34.95,36.0,34.9,35.85
1102,亞泥,15319913,34.5,34.5,33.75,34.5
1103,嘉泥,1349239,14.2,14.3,14.15,14.2
1104,環泥,95651,19.2,19.35,19.1,19.3
1108,幸福,154000,7.23,7.4,7.23,7.4


In [9]:
# "Thousands Separators" make price data "str", not "float" numbers
print(df.loc['3008'])
print(type(df.loc['3008', '收盤價']))

證券名稱         大立光
成交股數     1642048
開盤價     3,475.00
最高價     3,705.00
最低價     3,475.00
收盤價     3,705.00
Name: 3008, dtype: object
<class 'str'>


#### Series.map(arg, na_action=None)
Map values of <font color='red'>Series</font> using input correspondence (a dict, Series, or function).

In [10]:
def f(x):
    if ',' in x:
        x = x.replace(',', '')
    try:
        return float(x)
    except:
        return None

df['開盤價'] = df['開盤價'].map(f)
df['最高價'] = df['最高價'].map(f)
df['最低價'] = df['最低價'].map(f)
df['收盤價'] = df['收盤價'].map(f)

print(df.loc['3008'])
print(type(df.loc['3008', '收盤價']))

證券名稱        大立光
成交股數    1642048
開盤價        3475
最高價        3705
最低價        3475
收盤價        3705
Name: 3008, dtype: object
<class 'numpy.float64'>


In [11]:
df['當日漲幅(%)'] = round((df['收盤價'] - df['開盤價']) / df['開盤價'] * 100, 2)
df.head()

Unnamed: 0_level_0,證券名稱,成交股數,開盤價,最高價,最低價,收盤價,當日漲幅(%)
證券代號,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1101,台泥,26938982,34.95,36.0,34.9,35.85,2.58
1102,亞泥,15319913,34.5,34.5,33.75,34.5,0.0
1103,嘉泥,1349239,14.2,14.3,14.15,14.2,0.0
1104,環泥,95651,19.2,19.35,19.1,19.3,0.52
1108,幸福,154000,7.23,7.4,7.23,7.4,2.35


In [12]:
df1 = df.sort_values(by='當日漲幅(%)', ascending=False)
df1.head()

Unnamed: 0_level_0,證券名稱,成交股數,開盤價,最高價,最低價,收盤價,當日漲幅(%)
證券代號,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
6674,鋐寶科技,4077780,45.1,52.0,43.45,50.6,12.2
5225,東科-KY,752000,27.5,30.15,27.5,30.1,9.45
3321,同泰,567000,10.85,11.85,10.85,11.85,9.22
2440,太空梭,174019,8.39,9.13,8.39,9.13,8.82
3034,聯詠,8978322,132.5,143.0,132.0,143.0,7.92


In [13]:
print(f'Max. 成交股數 = {df["成交股數"].max():.0f}')
print(f'Avg. 成交股數 = {df["成交股數"].mean():.0f}')
print(f'Min. 成交股數 = {df["成交股數"].min():.0f}')

Max. 成交股數 = 101473505
Avg. 成交股數 = 3414307
Min. 成交股數 = 0


In [14]:
print(f'當日成交最熱絡: {df["證券名稱"][df["成交股數"].idxmax()]}')
df[df['證券名稱'] == '旺宏']

當日成交最熱絡: 旺宏


Unnamed: 0_level_0,證券名稱,成交股數,開盤價,最高價,最低價,收盤價,當日漲幅(%)
證券代號,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2337,旺宏,101473505,21.4,22.55,21.2,22.55,5.37


In [15]:
df.loc['2454']

證券名稱           聯發科
成交股數       7644178
開盤價            243
最高價            247
最低價          242.5
收盤價          245.5
當日漲幅(%)       1.03
Name: 2454, dtype: object