In [1]:
from pytdx.hq import TdxHq_API
from pytdx.params import TDXParams
from datetime import datetime, timedelta
import pandas as pd
import polars as pl
import os
from tqdm import tqdm

HOST = '119.147.212.81'
PORT = 7709

API_TICK_SIZE = 2000

In [2]:
api = TdxHq_API()

In [3]:
api

<pytdx.hq.TdxHq_API at 0x7f49ab5fd850>

In [4]:
with api.connect(HOST, PORT):
    data = api.get_history_transaction_data(TDXParams.MARKET_SZ, '300507', 0, API_TICK_SIZE, 20230111)
    df_tmp = api.to_df(data)

df_tmp.head()

Unnamed: 0,time,price,vol,buyorsell
0,11:24,6.78,1,1
1,11:24,6.78,2,1
2,11:25,6.79,41,0
3,11:25,6.78,10,0
4,11:25,6.77,1,1


In [5]:
def get_historical_transaction(api, mkt_code, symbol, d_int):
    offset = 0
    dfs = []
    while True:
        data = api.get_history_transaction_data(mkt_code, symbol, offset, API_TICK_SIZE, d_int)
        if data is None or len(data) == 0:
            break
        df_tmp = api.to_df(data)
        dfs.insert(0, df_tmp)
        offset += API_TICK_SIZE

    if len(dfs) == 0:
        return None

    df = pd.concat(dfs)
    # print(df.head())
    df_pl = pl.DataFrame({
        'index': df.index.to_list(),
        'time_str': df['time'].to_list(),
        'price': df['price'].to_list(),
        'vol': df['vol'].to_list(),
        'buyorsell': df['buyorsell'].to_list()
    })
    return df_pl

In [6]:
def get_stk_tick_data(mkt_code=TDXParams.MARKET_SZ, symbol='300507', base_d=datetime(year=2022, month=10, day=9)):
    with api.connect(HOST, PORT):
        for i in list(range(60)):
            d =  base_d - timedelta(days=i)
            d_str = datetime.strftime(d, '%Y%m%d')
            d_int = int(d_str)

            df = get_historical_transaction(api, mkt_code, symbol, d_int)
            if df is None:
                continue
            if not os.path.exists(f'../cn_data/{symbol}'):
                os.mkdir(f'../cn_data/{symbol}')

            df.to_csv(f'../cn_data/{symbol}/{d_str}.csv')

In [7]:
df_cn_stks = pl.read_csv('../stk_list/cn_list.csv')
df_cn_stks.head()

df_cn_stks['code'] = df_cn_stks['code'].apply(lambda x: str(x).zfill(6))
df_cn_stks.head()

symbol,name,tdx_api_mkt,code
str,str,i64,str
"""SZ002875""","""安奈儿""",0,"""002875"""
"""SZ002873""","""新天药业""",0,"""002873"""
"""SZ002870""","""香山股份""",0,"""002870"""
"""SZ002869""","""金溢科技""",0,"""002869"""
"""SZ002868""","""绿康生化""",0,"""002868"""


In [8]:
for i in tqdm(list(range(len(df_cn_stks[1540:])))):
    mkt = df_cn_stks['tdx_api_mkt'][i]
    code = df_cn_stks['code'][i]
    get_stk_tick_data(mkt, code)

100%|██████████| 484/484 [1:35:16<00:00, 11.81s/it]


In [8]:
df = pl.read_csv('../cn_data/300507/20230109.csv')

max_v = max(df['vol'])
max_v = max_v * 2
max_v

10642

In [9]:
df.head()

index,time_str,price,vol,buyorsell
i64,str,f64,i64,i64
0,"""09:25""",5.78,241,2
1,"""09:30""",5.78,180,1
2,"""09:30""",5.8,25,0
3,"""09:30""",5.8,91,0
4,"""09:30""",5.8,89,0


In [10]:
def dstr_2_dt(x):
    return datetime(year=2023, month=1, day=6) + timedelta(hours=int(x[:2]), minutes=int(x[-2:]))

In [11]:
i, j = 0, 0
x = []
thigh = []
tlow = []
topen = []
tclose = []
tvol = []
ttime = []

plist = df['price'].to_list()
v_list = df['vol'].to_list()
time_list = df['time_str'].to_list()

tmp_v = 0
tmp_time = None
xx = 0

while i < len(df.index):
    
    j = i
    while j < len(df.index):
#         print( "{} - {}".format(j, tmp_v) )
        tmp_v += v_list[j]
        if tmp_time is None:
            tmp_time = time_list[j]

        if tmp_v >= max_v or j == len(df.index)-1:
#             print('\n')
            thigh.append( max(plist[i:j+1]) )
            tlow.append( min(plist[i:j+1]) )
            topen.append( plist[i] )
            tclose.append( plist[j] )
            tvol.append( tmp_v )

            # ttime.append( dstr_2_dt(tmp_time) )
            ttime.append( dstr_2_dt(time_list[0]) + timedelta(minutes=xx) )

            tmp_v = 0
            tmp_time = None
            i = j+1
            x.append(xx)
            xx += 1
            break 
        j += 1

In [12]:
df = pl.DataFrame({
    'datetime': ttime,
    'open': topen,
    'high': thigh,
    'low': tlow,
    'close': tclose,
    'volume': tvol
})
df['symbol'] = ['300507'] * len(df)
df['freq'] = ['1m'] * len(df)

df.head()

datetime,open,high,low,close,volume,symbol,freq
datetime,f64,f64,f64,f64,i64,str,str
2023-01-06 09:25:00,5.78,5.85,5.78,5.85,10667,"""300507""","""1m"""
2023-01-06 09:26:00,5.85,5.87,5.84,5.86,10763,"""300507""","""1m"""
2023-01-06 09:27:00,5.86,5.9,5.85,5.89,10719,"""300507""","""1m"""
2023-01-06 09:28:00,5.9,5.91,5.88,5.88,10774,"""300507""","""1m"""
2023-01-06 09:29:00,5.89,5.89,5.83,5.85,10680,"""300507""","""1m"""


In [13]:
df.to_parquet('cn_tick_intraday_data.1m.parquet')
df.to_csv('cn_tick_intraday_data.1m.csv')

In [26]:
pl.read_parquet('../compute.1d.parquet')

time,symbol,freq,open,high,low,close,volume,ema27,ema50,ema200,std50,atr20,score,cond1,cond2,cond3,"_op_AND(n45,n53)",condout_1,condout_2,sell_cond,warmup_timer,holding,sig_buy,sig_sell,pnl_bar,pnl_trade
datetime,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,f64,f64
2017-01-03 15:00:00,"""000002""","""1d""",16.77,17.030001,16.77,16.91,2.6601184e7,16.91,16.91,16.91,0.0,0.26,1.0,false,false,false,false,false,false,false,false,false,false,false,0.0,0.0
2017-01-03 15:00:00,"""000089""","""1d""",7.65,7.92,7.65,7.89,9.987716e6,7.89,7.89,7.89,0.0,0.27,1.0,false,false,false,false,false,false,false,false,false,false,false,0.0,0.0
2017-01-03 15:00:00,"""000090""","""1d""",4.79,4.86,4.76,4.84,1.2665246e7,4.84,4.84,4.84,0.0,0.1,1.0,false,false,false,false,false,false,false,false,false,false,false,0.0,0.0
2017-01-03 15:00:00,"""000156""","""1d""",15.79,15.86,15.72,15.84,4.261864e6,15.84,15.84,15.84,0.0,0.139999,1.0,false,false,false,false,false,false,false,false,false,false,false,0.0,0.0
2017-01-03 15:00:00,"""000301""","""1d""",4.98,5.12,4.93,5.0,3.4400284e7,5.0,5.0,5.0,0.0,0.19,1.0,false,false,false,false,false,false,false,false,false,false,false,0.0,0.0
2017-01-03 15:00:00,"""000519""","""1d""",14.82,15.37,14.63,15.13,3.0613646e7,15.13,15.13,15.13,2.3842e-7,0.74,1.0,false,false,false,false,false,false,false,false,false,false,false,0.0,0.0
2017-01-03 15:00:00,"""000528""","""1d""",4.95,5.0,4.94,4.99,1.6523276e7,4.99,4.99,4.99,0.0,0.06,1.0,false,false,false,false,false,false,false,false,false,false,false,0.0,0.0
2017-01-03 15:00:00,"""000538""","""1d""",49.220001,49.290001,44.990002,45.970001,4.5099444e7,45.970001,45.970001,45.970001,0.0,4.299999,1.0,false,false,false,false,false,false,false,false,false,false,false,0.0,0.0
2017-01-03 15:00:00,"""000540""","""1d""",4.44,4.74,4.39,4.48,1.54077248e8,4.48,4.48,4.48,0.0,0.35,1.0,false,false,false,false,false,false,false,false,false,false,false,0.0,0.0
2017-01-03 15:00:00,"""000717""","""1d""",4.99,5.14,4.95,5.1,1.8873668e7,5.1,5.1,5.1,0.0,0.19,1.0,false,false,false,false,false,false,false,false,false,false,false,0.0,0.0
