In [1]:
import requests
import pandas as pd
import json
import numpy as np
from datetime import datetime, date
import csv
import time
import ssl
import os
ssl._create_default_https_context = ssl._create_unverified_context
pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
time_data = [2024, 4, 18]

In [3]:
def get_twse_stock_db_info():
    link = 'http://www.twse.com.tw/exchangeReport/BWIBBU_ALL?response=open_data'
    df = pd.read_csv(link, encoding='utf_8_sig')
    return df
    
def get_twse_stock_info(df, stock):
    target_data = df[df["股票代號"] == int(stock)]
    name = target_data.iloc[0]['股票名稱']
    priceEarningRatio = target_data.iloc[0]['本益比']
    yieldRatio = target_data.iloc[0]['殖利率(%)']
    priceBookRatio = target_data.iloc[0]['股價淨值比']
    name, priceEarningRatio, yieldRatio, priceBookRatio
    return name, priceEarningRatio, yieldRatio, priceBookRatio

def get_otc_stock_db_info():
    link = 'http://www.tpex.org.tw/openapi/v1/tpex_mainboard_peratio_analysis'
    json_data = requests.get(link).json()
    df = pd.DataFrame.from_records(json_data)
    return df
    
def get_otc_stock_info(df, stock):
    target_data = df[df['SecuritiesCompanyCode'] == stock]
    name = target_data.iloc[0]['CompanyName']
    priceEarningRatio = target_data.iloc[0]['PriceEarningRatio']
    dividendPerShare = target_data.iloc[0]['DividendPerShare']
    yieldRatio = target_data.iloc[0]['YieldRatio']
    priceBookRatio = target_data.iloc[0]['PriceBookRatio']
    name, priceEarningRatio, yieldRatio, priceBookRatio
    return name, priceEarningRatio, yieldRatio, priceBookRatio

def get_time_input(yy, mm, dd, days = 100):
    time_list = []
    month_num = days//20
    for i in range(month_num):
        yyy = yy
        mmm = mm
        if (mm - i > 0):
            yyy = yy
            mmm = mm - i
        else:
            yyy = yy-1
            mmm = 12 - (i - mm)
        time_list.append([yyy, mmm])
    time_list.reverse()
    return time_list, [yy, mm, dd]

In [4]:
def string_with_comma_to_int(x):
    return int(x.replace(",", ""))

def string_with_comma_to_float(x):
    try:
        return float(x.replace(",", ""))
    except:
        return 0
    
def string_to_float(x):
    try:
        return float(x)
    except:
        return 0
    
def vol_for_twse(x):
    try:
        return round(float(x.replace(",", ""))/1000)
    except:
        return 0

def moving_average(x, w):
    return np.convolve(x, np.ones(w), "valid") / w

def get_stock_volumn_price(yy, mm, dd, stock_tag, last_date):
    date_tag = date_tag = str(yy) + str(mm).zfill(2) + "01"
    url = 'http://www.twse.com.tw/exchangeReport/STOCK_DAY?response=open_data&date=%s&stockNo=%s'%(date_tag, stock_tag)
    try:
        df = pd.read_csv(url, encoding='utf_8_sig')
    except:
        return None
    # ["日期","成交股數","成交金額","開盤價","最高價","最低價","收盤價","漲跌價差","成交筆數"]
    df_target = df[['日期', '成交股數', '收盤價']]
    df_target.iloc[:, 1] = df_target.iloc[:, 1].apply(vol_for_twse) # volumn, 成交張數 = 成交股數 / 1000
    df_target.iloc[:, 2] = df_target.iloc[:, 2].apply(string_to_float) # price
    df_target.rename(columns = {'成交股數':'成交張數'}, inplace = True)
    return df_target

def get_otc_stock_volumn_price(yy, mm, dd, stock_tag, last_date):
    yy = yy - 1911
    url = 'http://www.tpex.org.tw/web/stock/aftertrading/daily_trading_info/st43_result.php?d=%s/%s/%s&stkno=%s'%(yy, mm, dd, stock_tag)
    json_data = requests.get(url).json()
    # ['日期', '成交張數', '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌價差', '成交筆數']
    columns = ['日期', '成交張數', '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌價差', '成交筆數']
    df = pd.DataFrame(json_data['aaData'], columns=columns)
    df_target = df[['日期', '成交張數', '收盤價']]
    df_target.iloc[:, 1] = df_target.iloc[:, 1].apply(string_with_comma_to_float) # volumn
    df_target.iloc[:, 2] = df_target.iloc[:, 2].apply(string_with_comma_to_float) # price
    return df_target

In [5]:
def get_time_duration_stock_info(time_data, stock_tag, min_volumn = 150, ma_num = 5, isOtc = False):
    df_all = pd.DataFrame()
    time_list, last_date = get_time_input(time_data[0], time_data[1], time_data[2])
    for time_item in time_list:
        year = time_item[0]
        month = time_item[1]
        if isOtc:
            df = get_otc_stock_volumn_price(year, month, "01", stock_tag, last_date)
        else:
            df = get_stock_volumn_price(year, month, "01", stock_tag, last_date)
        if df is None:
            continue
        time.sleep(0.10)
        df_all = pd.concat([df_all, df], axis=0)
        # yy = time_item[0]
        # for mm in range(time_item[1], time_item[2] + 1):
        #     if isOtc:
        #         df = get_otc_stock_volumn_price(yy, mm, "01", stock_tag)
        #     else:
        #         df = get_stock_volumn_price(yy, mm, "01", stock_tag)
        #         if df is None:
        #             continue
        #         time.sleep(0.15)
        #     df_all = pd.concat([df_all, df], axis=0)
    
    # drop out of range date
    df_all = df_all.reset_index()
    df_all.drop(df_all.columns[0], axis=1, inplace = True)
    drop_num = 0
    for i in range(len(df_all)):
        try:
            row_idx = len(df_all) - i - 1
            row_date =df_all.loc[row_idx, "日期"]
            row_date = row_date.replace('*', "")
            row_yy = int(row_date.split('/')[0]) + 1911
            row_mm = int(row_date.split('/')[1])
            row_dd = int(row_date.split('/')[2])
            if row_yy >= last_date[0] and row_mm >= last_date[1] and row_dd > last_date[2]:
                drop_num = drop_num + 1
        except:
            # print(row_date)
            continue
    df_all.drop(df_all.tail(drop_num).index, inplace = True)
      
    df_np = df_all.to_numpy().copy()
    
    if len(df_np) == 0:
        return None, None, None
    # if df_np[:, 1].max() > min_volumn:
    #     df_np[:, 1][df_np[:, 1] < min_volumn] = min_volumn
    # if df_np[:, 1].max() == df_np[:, 1].min():
    #     return None, None
    # if df_np[:, 2].max() == df_np[:, 2].min():
    #     return None, None
    
    # df_np[:, 1] = (df_np[:, 1] - df_np[:, 1].min()) / ((df_np[:, 1].max() - df_np[:, 1].min()))
    # df_np[:, 2] = (df_np[:, 2] - df_np[:, 2].min()) / ((df_np[:, 2].max() - df_np[:, 2].min()))
    
    # df_vol_ma = moving_average(df_np[:, 1], ma_num)
    # df_pri_ma = moving_average(df_np[:, 2], ma_num)
    df_vol_ma = df_np[:, 1]
    df_pri_ma = df_np[:, 2]
    
    vol_data = df_vol_ma
    pri_data = df_pri_ma
    return vol_data, pri_data, df_all

def calculate_target(vol_data, pri_data):
    aa = -7
    bb = -28
    if vol_data[bb:aa].mean() == 0:
        return False
    if pri_data[bb:aa].mean() == 0:
        return False
    try:
        # vol_valid = vol_data[aa:].mean() / vol_data[bb:aa].mean() > 5
        # vol_valid = np.median(vol_data[aa:]) / np.median(vol_data[bb:aa]) > 7
        vol_med_factor = 0.5
        vol_cur_factor = 0.5
        vol_short = ((1 - vol_cur_factor) * (vol_med_factor * np.median(vol_data[aa:]) + (1 - vol_med_factor) * np.mean(vol_data[aa:])) + vol_cur_factor * vol_data[-1])
        # vol_short = (vol_med_factor * np.median(vol_data[aa:]) + (1 - vol_med_factor) * np.mean(vol_data[aa:]))
        vol_long = (vol_med_factor * np.median(vol_data[bb:aa]) + (1 - vol_med_factor) * np.mean(vol_data[bb:aa]))
        vol_valid = vol_short / vol_long > 4.3
        vol_valid = vol_valid and (vol_short > 800)
        # previous_high_valid = pri_data[-10:].max() / pri_data[-1] <= 1.08
        # previous_high_valid = previous_high_valid & (pri_data[-20:].max() / pri_data[-1] < 1.05)
        # pri_valid = pri_data[aa:].mean() / pri_data[bb:aa].mean() > 1.03
        # pri_ma = moving_average(pri_data, 4)
        # trend_valid =  pri_ma[-8:].mean()/pri_ma[-88:-80].mean() > 0.8
        pri_5ma = moving_average(pri_data, 5)
        pri_10ma = moving_average(pri_data, 10)
        pri_20ma = moving_average(pri_data, 20)
        pri_60ma = moving_average(pri_data, 60)
        # trend_valid = (pri_5ma[-1]/pri_10ma[-1] > 0.98) and (pri_5ma[-1]/pri_20ma[-1] > 1.01) and (pri_5ma[-1]/pri_60ma[-1] > 1.02)
        ma_pos_valid = (pri_data[-1] / pri_5ma[-1] >= 0.95) and (pri_data[-1] / pri_10ma[-1] >= 0.98) and (pri_data[-1] / pri_20ma[-1] >= 1.00) and (pri_data[-1] / pri_60ma[-1] >= 1.00)
        ma_close_valid = (pri_5ma[-1]) / (pri_10ma[-1]) <= 1.08 and (pri_5ma[-1]) / (pri_10ma[-1]) > 0.94
        ma_close_valid = ma_close_valid and (pri_10ma[-1]) / (pri_20ma[-1]) <= 1.07 and (pri_10ma[-1]) / (pri_20ma[-1]) > 0.95
    except:
        return False
    # print(vol_valid, trend_valid, ma_pos_valid)
    return vol_valid and ma_pos_valid and ma_close_valid


def calculate_target_trend(vol_data, pri_data):
    data_min_num = 70
    if len(vol_data) < data_min_num or len(pri_data) < data_min_num:
        # print("Not enough data amount")
        return False
    aa = -7
    bb = -28
    if vol_data[bb:aa].mean() == 0:
        return False
    if pri_data[bb:aa].mean() == 0:
        return False
    
    pos_valid = True
    vol_valid = True
    trend_valid = True
    
    try:
        vol_valid = vol_valid and np.max(vol_data[aa:-1]) > 600
        pri_cur = pri_data[-1]
        pri_5ma = moving_average(pri_data, 5)
        pri_10ma = moving_average(pri_data, 10)
        pri_20ma = moving_average(pri_data, 20)
        pri_60ma = moving_average(pri_data, 60)
        
        # pos_valid = pos_valid and pri_cur / pri_5ma[-1] <= 1.04 and pri_cur / pri_5ma[-1] >= 0.97
        pos_valid = pos_valid and pri_cur / pri_5ma[-1] <= 1.03 and pri_cur / pri_5ma[-1] >= 0.97
        # pos_valid = pos_valid and pri_cur / pri_10ma[-1] <= 1.04 and pri_cur / pri_10ma[-1] >= 0.97
        # pos_valid = pos_valid and pri_cur / pri_10ma[-1] <= 1.03 and pri_cur / pri_10ma[-1] >= 0.97
        pos_valid = pos_valid and pri_cur / pri_10ma[-1] <= 1.03 and pri_cur / pri_10ma[-1] >= 0.98
        # pos_valid = pos_valid and pri_5ma[-1] / pri_10ma[-1] <= 1.04 and pri_5ma[-1] / pri_10ma[-1] >= 0.975
        # pos_valid = pos_valid and pri_5ma[-1] / pri_10ma[-1] <= 1.032 and pri_5ma[-1] / pri_10ma[-1] >= 0.974
        pos_valid = pos_valid and pri_5ma[-1] / pri_10ma[-1] <= 1.032 and pri_5ma[-1] / pri_10ma[-1] >= 0.98
        # pos_valid = pos_valid and pri_10ma[-1] / pri_20ma[-1] <= 1.06 and pri_10ma[-1] / pri_20ma[-1] >= 0.98
        pos_valid = pos_valid and pri_10ma[-1] / pri_20ma[-1] <= 1.045 and pri_10ma[-1] / pri_20ma[-1] >= 0.974
        pos_valid = pos_valid and pri_cur / pri_20ma[-1] <= 1.07 and pri_cur / pri_20ma[-1] >= 0.99
        # pos_valid = pos_valid and pri_10ma[-1] / pri_60ma[-1] >= 1.04
        # pos_valid = pos_valid and pri_10ma[-1] / pri_60ma[-1] >= 1.022
        pos_valid = pos_valid and pri_10ma[-1] / pri_60ma[-1] >= 1.027
        
        trend_valid = trend_valid and pri_20ma[-1] / pri_20ma[-8] >= 0.985
        # trend_valid = trend_valid and pri_60ma[-1] / pri_60ma[-10] >= 1.035
        # trend_valid = trend_valid and pri_60ma[-1] / pri_60ma[-10] >= 1.023
        trend_valid = trend_valid and pri_60ma[-1] / pri_60ma[-10] >= 1.027
        
        # print(pri_cur / pri_5ma[-1], pri_cur / pri_10ma[-1], pri_5ma[-1] / pri_10ma[-1], pri_10ma[-1] / pri_20ma[-1], pri_cur / pri_20ma[-1] ,pri_10ma[-1] / pri_60ma[-1])
        # print(pri_20ma[-1] / pri_20ma[-8], pri_60ma[-1] / pri_60ma[-10])
    except:
        # print("?")
        return False
    # print(vol_valid , pos_valid , trend_valid)
    return vol_valid and pos_valid and trend_valid

In [6]:
def analyze_stock(time_data, isOtc):
    if isOtc:
        df = get_otc_stock_db_info()
        data_path = r".\db\otc.csv"
    else:
        df = get_twse_stock_db_info()
        data_path = r".\db\twse.csv"

    stock_list = []
    with open(data_path, newline='', encoding='utf_8_sig') as csvfile:
        line_list = csv.reader(csvfile)
        for line in line_list:
            stock_list.append(line[0])
            
    target_list = []

    for stock in stock_list:
        vol_data, pri_data, df_all = get_time_duration_stock_info(time_data, stock, min_volumn=150, ma_num=1, isOtc=isOtc)
        if vol_data is None or pri_data is None:
            continue
        # valid = calculate_target(vol_data, pri_data)
        valid = calculate_target_trend(vol_data, pri_data)
        if valid:
            if isOtc:
                name, priceEarningRatio, yieldRatio, priceBookRatio = get_otc_stock_info(df, stock)
                target_list.append([stock, name, priceEarningRatio, yieldRatio, priceBookRatio, pri_data[-1], 1])
                print(stock, name, priceEarningRatio, yieldRatio, priceBookRatio, pri_data[-1], 1)
            else:
                name, priceEarningRatio, yieldRatio, priceBookRatio = get_twse_stock_info(df, stock)
                target_list.append([stock, name, priceEarningRatio, yieldRatio, priceBookRatio, pri_data[-1], 0])
                print(stock, name, priceEarningRatio, yieldRatio, priceBookRatio, pri_data[-1], 0)
    return target_list

In [7]:
target_list = analyze_stock(time_data, isOtc = False)

1319 東陽 24.9 3.15 2.95 127.0 0
1560 中砂 43.88 1.55 6.06 258.0 0
2330 台積電 24.87 1.62 6.03 804.0 0
2367 燿華 nan 1.07 1.91 28.0 0
2404 漢唐 16.84 5.02 6.46 418.0 0
2467 志聖 41.56 2.21 6.55 135.5 0
2493 揚博 16.3 5.09 3.75 98.3 0
2505 國揚 34.63 1.44 1.04 27.7 0
2535 達欣工 12.69 5.17 1.67 58.0 0
2539 櫻花建 34.08 2.63 5.52 76.0 0
2637 慧洋-KY 14.77 4.25 1.07 64.7 0
2809 京城銀 10.18 5.27 1.26 56.9 0
2903 遠百 16.05 5.11 1.41 31.3 0
3055 蔚華科 nan 1.33 3.41 75.3 0
3356 奇偶 16.94 5.35 2.41 54.2 0
3617 碩天 14.72 3.35 2.98 239.0 0
3622 洋華 17.68 3.02 1.53 66.3 0
4770 上品 20.66 2.75 4.67 436.0 0
4915 致伸 16.2 4.6 2.37 87.0 0
4994 傳奇 51.7 3.29 6.89 121.5 0
6192 巨路 13.59 4.5 1.65 111.0 0
6438 迅得 14.98 4.15 2.4 129.0 0
6442 光聖 54.35 1.53 4.45 137.5 0
6691 洋基工程 19.41 4.59 6.13 392.5 0
6807 峰源-KY 10.11 5.83 1.14 44.6 0
6835 圓裕 15.43 3.63 2.24 55.1 0
6869 雲豹能源-創 16.34 4.29 4.24 142.5 0
8110 華東 nan 1.11 0.88 18.05 0
8467 波力-KY 25.76 2.57 4.9 194.5 0
9911 櫻花 17.63 4.49 3.16 86.4 0


In [8]:
target_list_otc = analyze_stock(time_data, isOtc = True)

1591 駿吉-KY N/A 0.00 5.21 63.9 1
3171 新洲 14.30 6.72 1.84 29.75 1
3232 昱捷 126.20 1.58 2.11 31.55 1
3289 宜特 25.15 2.88 2.93 129.5 1
3526 凡甲 19.73 5.03 5.22 230.5 1
3663 鑫科 85.65 1.30 2.84 46.25 1
3680 家登 45.31 1.62 5.39 464.0 1
4542 科嶠 40.30 2.22 3.23 67.7 1
5536 聖暉* 14.24 4.91 3.01 214.0 1
6015 宏遠證 25.20 1.95 0.95 12.85 1
6021 美好證 52.00 3.30 0.99 18.2 1
6190 萬泰科 25.38 3.24 2.26 37.05 1
6208 日揚 20.53 0.00 2.51 62.0 1
6248 沛波 8.84 5.63 1.69 35.55 1
8027 鈦昇 311.25 0.50 4.14 99.6 1
8064 東捷 30.68 2.38 1.64 29.45 1


In [9]:
output_file_path = r"D:\Stock\daily_result\%s%s%s_trend.csv"%(time_data[0], str(time_data[1]).zfill(2), str(time_data[2]).zfill(2))
if os.path.exists(output_file_path):
    print("File exist!!!")
else:
    with open(output_file_path, 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows([time_data])
        writer.writerows([['ID', 'Name', 'PE', 'Yield', 'PB', 'Price', 'isOTC']])
        writer.writerows(target_list)
        writer.writerows(target_list_otc)

In [47]:
target_list = []
time_data = [2021, 10, 14]

vol_data, pri_data, df = get_time_duration_stock_info(time_data, 3675, min_volumn=150, ma_num=1, isOtc=True)
# valid = calculate_target(vol_data, pri_data)
valid = calculate_target_trend(vol_data, pri_data)
print(valid)

True True True
True
