In [2]:
import sys,os
sys.path.append("..")
import django
django.setup()
import pandas as pd
from io import StringIO
import requests
import datetime

In [21]:
class CrawlStockPriceTW:
    def __init__(self, date):
        self.date = date
        self.date_str = date.strftime("%Y%m%d")
        self.target_name = "台股每日交易資訊"
        self.sub_market = ["sii", "otc", "rotc"]

    def crawl_sii(self):

        r = requests.post(
            "http://www.twse.com.tw/exchangeReport/MI_INDEX?response=csv&date=" + self.date_str + "&type=ALLBUT0999")

        content = r.text.replace("=", "")
        lines = content.split("\n")
        lines = list(filter(lambda l: len(l.split('",')) > 10, lines))
        content = "\n".join(lines)
        if content == "":
            return None
        df = pd.read_csv(StringIO(content))
        df = df.astype(str)
        df = df.apply(lambda s: s.str.replace(",", ""))
        df.iloc[:, 2:] = df.iloc[:, 2:].apply(lambda s: pd.to_numeric(s, errors="coerce"))
        df["date"] = pd.to_datetime(self.date)
        df = df.loc[:, ["證券代號", "date", "證券名稱", "成交股數", "成交金額", "開盤價", "收盤價", "最高價", "最低價"]]
        df = df.rename(columns={"證券代號": "stock_id", "證券名稱": "stock_name",
                                "成交股數": "turnover_vol", "成交金額": "turnover_price",
                                "開盤價": "open_price", "收盤價": "close_price",
                                "最高價": "high_price", "最低價": "low_price"})
        df['market']='上市'
        df = df.where(pd.notnull(df), None)

        return df

    @staticmethod
    def select_otc_id(code):
        if len(code) > 5:
            if code[-1]=="P":
                return False
            else:
                try:
                    code = int(code)
                    if code > 10000:
                        return False
                    else:
                        return True
                except ValueError:
                    return True
        else:
            return True

    def crawl_otc(self):

        y = str(int(self.date.strftime("%Y")) - 1911)
        date_str = y + "/" + self.date.strftime("%m") + "/" + self.date.strftime("%d")

        link = "http://www.tpex.org.tw/web/stock/aftertrading/daily_close_quotes/stk_quote_download.php?l=zh-tw&d=" \
               + date_str + "&s=0,asc,0"
        r = requests.get(link)

        lines = r.text.replace("\r", "").split("\n")
        try:
            df = pd.read_csv(StringIO("\n".join(lines[3:])), header=None)
        except pd.errors.ParserError:
            return None
        df.columns = list(map(lambda s: s.replace(" ", ""), lines[2].split(",")))
        df = df.apply(lambda s: s.str.replace(",", ""))
        df["stock_id"] = df["代號"]
        df["代號"] = df["代號"].apply(lambda s: self.select_otc_id(s))
        df = df[df["代號"]]
        df["date"] = pd.to_datetime(self.date)
        df = df.loc[:, ["stock_id", "date", "名稱", "成交股數", "成交金額(元)", "開盤", "收盤", "最高", "最低"]]
        df = df.rename(columns={"名稱": "stock_name",
                                "成交股數": "turnover_vol", "成交金額(元)": "turnover_price",
                                "開盤": "open_price", "收盤": "close_price",
                                "最高": "high_price", "最低": "low_price"})
        df.iloc[:, 3:] = df.iloc[:, 3:].apply(lambda s: pd.to_numeric(s, errors="coerce"))
        df = df[df["turnover_vol"] >= 0]
        df['market']='上櫃'
        df = df.where(pd.notnull(df), None)

        return df

    def crawl_rotc(self):

        link = "http://www.tpex.org.tw/web/emergingstock/historical/daily/EMDaily_dl.php?l=zh-tw&f=EMdes010." + \
               self.date_str + "-C.csv"

        r = requests.get(link)
        lines = r.text.replace("\r", "").split("\n")
        try:
            columns_line = lines[3]
        except IndexError:
            return None
        lines = list(filter(lambda l: len(l.split('",')) > 10, lines))
        try:
            df = pd.read_csv(StringIO("\n".join(lines)), header=None)
        except pd.errors.EmptyDataError:
            return None
        df.columns = list(map(lambda l: l.replace(" ", ""), columns_line.split(",")))
        df = df.astype(str)
        df = df.apply(lambda s: s.str.replace(",", ""))
        df.iloc[:, 3:] = df.iloc[:, 3:].apply(lambda s: pd.to_numeric(s, errors="coerce"))
        df["date"] = pd.to_datetime(self.date)
        if "證券名稱" in df.columns:
            df = df.loc[:, ["證券代號", "date", "證券名稱", "成交量", "成交金額", "前日均價", "最後", "最高", "最低"]]

        # old format("名稱")
        else:
            df = df.loc[:, ["證券代號", "date", "名稱", "成交量", "成交金額", "前日均價", "最後", "最高", "最低"]]
            df = df.rename(columns={"名稱": "證券名稱"})

        df = df.rename(columns={"證券代號": "stock_id", "證券名稱": "stock_name",
                                "成交量": "turnover_vol", "成交金額": "turnover_price",
                                "前日均價": "open_price", "最後": "close_price",
                                "最高": "high_price", "最低": "low_price"})

        # solve " "
        df['stock_id']=df['stock_id'].apply(lambda s: s[:s.index(' ')] if '" "' in s else s)
        df['stock_name']=df['stock_name'].apply(lambda s: s[:s.index(' ')] if '" "' in s else s)
        df = df[df["stock_id"] != "合計"]
        df['market']='興櫃'
        df = df.where(pd.notnull(df), None)
        return df

    def crawl_main(self):
        try:
            df = pd.concat([self.crawl_sii(), self.crawl_otc(), self.crawl_rotc()])
        except ValueError:
            return None
        return df
    
z=CrawlStockPriceTW(datetime.datetime(2020,4,15)).crawl_rotc()
z

Unnamed: 0,stock_id,date,stock_name,turnover_vol,turnover_price,open_price,close_price,high_price,low_price,market
0,1260,2020-04-15,富味鄉,9010,162080,17.81,18,18.1,17.9,興櫃
1,1269,2020-04-15,乾杯,23500,1.5392e+06,63.65,65.9,66,64.7,興櫃
2,1342,2020-04-15,八貫,8000,362000,45.36,45.5,45.5,45,興櫃
3,1563,2020-04-15,巧新,904743,4.38093e+07,46.95,49.1,49.35,47.15,興櫃
4,1585,2020-04-15,鎧鉅,,,6.42,,,,興櫃
5,1594,2020-04-15,日高,21000,199770,9.9,9.5,9.9,9.41,興櫃
6,1780,2020-04-15,立弘,79000,1.3862e+06,17.46,17.6,17.6,17.45,興櫃
7,2071,2020-04-15,震南鐵,2500,58800,24,23.9,24,22.85,興櫃
8,2211,2020-04-15,長榮鋼,103130,3.60024e+06,34.7,35.2,35.35,34.75,興櫃
9,2237,2020-04-15,華德動能,50000,749100,14.52,15,15.05,14.9,興櫃


In [7]:
def select_otc_id(code):
    if len(code) > 5:
        if code[-1]=="P":
            return False
        else:
            try:
                code = int(code)
                if code > 10000:
                    return False
                else:
                    return True
            except ValueError:
                return True
    else:
        return True
    
select_otc_id('70624P')

False

In [18]:
ss=z['stock_name'].iloc[2]
ss

'八貫                '

In [19]:
ss[:ss.index(' ')]

'八貫'