In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import os
import sys
sys.path.insert(0, os.path.abspath('../'))

from czsc import KlineAnalyze

import tushare as ts
ts.set_token('14f4fbfa08778cba51c73f0ef928210cf2629931fb06f3a797564a0f')

定义数据获取程序如下（实际情况根据需要进行开发）：

In [2]:
from datetime import datetime, timedelta

def get_kline(ts_code, start_date,end_date, freq='30min'):
    """获取指定级别的前复权K线

    :param ts_code: str
        股票代码，如 600122.SH
    :param freq: str
        K线级别，可选值 [1min, 5min, 15min, 30min, 60min, D, M, Y]
    :param start_date,end_date: str
        日期，如 20190610
    :return: pd.DataFrame
        columns = ["symbol", "dt", "open", "close", "high", "low", "vol"]
    """

    end_date = datetime.strptime(end_date, '%Y%m%d')
    end_date = end_date + timedelta(days=1)
    end_date = end_date.date().__str__().replace("-", "")

    pro = ts.pro_api()
    df = pro.daily(ts_code=ts_code, start_date=start_date, end_date=end_date)
    #print(df)

    # 统一 k 线数据格式为 6 列，分别是 ["symbol", "dt", "open", "close", "high", "low", "vr"]
    if "min" in freq:
        df.rename(columns={'ts_code': "symbol", "trade_time": "dt"}, inplace=True)
    else:
        df.rename(columns={'ts_code': "symbol", "trade_date": "dt"}, inplace=True)

    df.drop_duplicates(subset='dt', keep='first', inplace=True)
    df.sort_values('dt', inplace=True)
    df['dt'] = df.dt.apply(str)
    if freq.endswith("min"):
        # 清理 9:30 的空数据
        df['not_start'] = df.dt.apply(lambda x: not x.endswith("09:30:00"))
        df = df[df['not_start']]
    df.reset_index(drop=True, inplace=True)

    k = df[['symbol', 'dt', 'open', 'close', 'high', 'low', 'vol']]

    for col in ['open', 'close', 'high', 'low']:
        k[col] = k[col].apply(round, args=(2,))
    return k

In [3]:
kline = get_kline(ts_code="600584.SH", start_date="20170710",end_date="20200710", freq='D')
kline

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,symbol,dt,open,close,high,low,vol
0,600584.SH,20170710,17.14,16.75,17.14,16.59,189919.24
1,600584.SH,20170711,16.67,16.02,16.68,16.00,208657.76
2,600584.SH,20170712,16.00,15.98,16.18,15.60,143915.65
3,600584.SH,20170713,16.00,15.64,16.07,15.58,121048.21
4,600584.SH,20170714,15.63,15.95,16.03,15.35,169544.33
...,...,...,...,...,...,...,...
719,600584.SH,20200706,35.90,37.51,37.51,35.08,1240966.75
720,600584.SH,20200707,40.00,38.34,41.26,38.00,2030649.43
721,600584.SH,20200708,38.94,41.62,42.00,37.71,1676926.20
722,600584.SH,20200709,40.91,43.33,44.39,40.29,1417807.11


使用 KlineAnalyze 进行分析

In [4]:
ka = KlineAnalyze(kline)

In [5]:
ka.to_html()

  super().__init__(init_opts=init_opts)
  super().__init__(init_opts=init_opts)
  super().__init__(init_opts=init_opts)
  super().__init__(init_opts=init_opts)
  super().__init__(init_opts=init_opts)
  super().__init__(init_opts=init_opts)


In [6]:
ka.up_zs_number(),ka.down_zs_number()

(1, 1)

In [7]:
ka.zs

[{'ZD': 16.42,
  'ZG': 24.45,
  'G': 16.99,
  'GG': 35.18,
  'D': 19.53,
  'DD': 8.04,
  'points': [{'dt': '20170726', 'fx_mark': 'd', 'xd': 14.75},
   {'dt': '20171113', 'fx_mark': 'g', 'xd': 26.12},
   {'dt': '20180207', 'fx_mark': 'd', 'xd': 16.42},
   {'dt': '20180425', 'fx_mark': 'g', 'xd': 24.45},
   {'dt': '20190104', 'fx_mark': 'd', 'xd': 8.04},
   {'dt': '20190312', 'fx_mark': 'g', 'xd': 16.99},
   {'dt': '20190711', 'fx_mark': 'd', 'xd': 11.52},
   {'dt': '20200225', 'fx_mark': 'g', 'xd': 35.18},
   {'dt': '20200330', 'fx_mark': 'd', 'xd': 19.53}]}]

In [9]:
kline

Unnamed: 0,symbol,dt,open,close,high,low,vol
0,600584.SH,20170710,17.14,16.75,17.14,16.59,189919.24
1,600584.SH,20170711,16.67,16.02,16.68,16.00,208657.76
2,600584.SH,20170712,16.00,15.98,16.18,15.60,143915.65
3,600584.SH,20170713,16.00,15.64,16.07,15.58,121048.21
4,600584.SH,20170714,15.63,15.95,16.03,15.35,169544.33
...,...,...,...,...,...,...,...
719,600584.SH,20200706,35.90,37.51,37.51,35.08,1240966.75
720,600584.SH,20200707,40.00,38.34,41.26,38.00,2030649.43
721,600584.SH,20200708,38.94,41.62,42.00,37.71,1676926.20
722,600584.SH,20200709,40.91,43.33,44.39,40.29,1417807.11


In [20]:
from functools import lru_cache

def ma(kline, params=(5, 10, 20, 60, 120, 250)):
    """计算指定周期的若干 MA 均线

    :param kline: pd.DataFrame
        K线，columns = ["symbol", "dt", "open", "close", "high", "low", "vol"]
    :param params: tuple
    :return: pd.DataFrame
        在原始数据中新增若干 MA 均线
    """
    for p in params:
        col = "ma" + str(p)
        kline.loc[:, col] = kline['close'].rolling(p).mean().apply(round, args=(2,))
    return kline

def macd(kline):
    """计算 MACD 指标

    :param kline: pd.DataFrame
        K线，columns = ["symbol", "dt", "open", "close", "high", "low", "vol"]
    :return: pd.DataFrame
        在原始数据中新增 diff,dea,macd 三列
    """

    short_, long_, m = 12, 26, 9
    kline.loc[:, 'diff'] = kline['close'].ewm(adjust=False, alpha=2 / (short_ + 1), ignore_na=True).mean() - \
                            kline['close'].ewm(adjust=False, alpha=2 / (long_ + 1), ignore_na=True).mean()
    kline.loc[:, 'dea'] = kline['diff'].ewm(adjust=False, alpha=2 / (m + 1), ignore_na=True).mean()
    kline.loc[:, 'macd'] = 2 * (kline['diff'] - kline['dea'])

    for col in ['diff', 'dea', 'macd']:
        kline.loc[:, col] = kline[col].apply(round, args=(2,))
    return kline

def boll(kline):
    """计算 BOLL 指标

    :param kline: pd.DataFrame
        K线，columns = ["symbol", "dt", "open", "close", "high", "low", "vol"]
    :return: pd.DataFrame
        在原始数据中新增 BOLL 指标结果
    """
    kline.loc[:, 'boll-mid'] = kline['close'].rolling(26).mean()
    kline.loc[:, 'boll-tmp2'] = kline['close'].rolling(20).std()
    kline.loc[:, 'boll-top'] = kline['boll-mid'] + 2 * kline['boll-tmp2']
    kline.loc[:, 'boll-bottom'] = kline['boll-mid'] - 2 * kline['boll-tmp2']

    for col in ['boll-mid', 'boll-top', 'boll-bottom']:
        kline.loc[:, col] = kline[col].apply(round, args=(2,))
    return kline

@lru_cache(maxsize=64)
def create_df(ka, ma_params=(5, 20, 120, 250), use_macd=True, use_boll=True):
    df = pd.DataFrame(ka.kline)
    df = ma(df, params=ma_params)
    if use_macd:
        df = macd(df)
    if use_boll:
        df = boll(df)
    return df

#ka.kline
def get_ka_feature(ka):
    """获取 KlineAnalyze 的特征

    这只是一个样例，想做多因子的，可以发挥自己的想法，大幅扩展特征数量。
    """
    feature = dict()

    feature["分型标记"] = 1 if ka.fx[-1]['fx_mark'] == 'g' else 0
    feature["笔标记"] = 1 if ka.bi[-1]['fx_mark'] == 'g' else 0
    feature["线段标记"] = 1 if ka.xd[-1]['fx_mark'] == 'g' else 0

    feature['向上笔背驰'] = 1 if ka.bi[-1]['fx_mark'] == 'g' and ka.bi_bei_chi() else 0
    feature['向下笔背驰'] = 1 if ka.bi[-1]['fx_mark'] == 'd' and ka.bi_bei_chi() else 0
    feature['向上线段背驰'] = 1 if ka.xd[-1]['fx_mark'] == 'g' and ka.xd_bei_chi() else 0
    feature['向下线段背驰'] = 1 if ka.xd[-1]['fx_mark'] == 'd' and ka.xd_bei_chi() else 0

    ma_params = (5, 20, 120, 250)
    df = create_df(ka, ma_params)
    last = df.iloc[-1].to_dict()
    for p in ma_params:
        feature['收于MA%i上方' % p] = 1 if last['close'] > last['ma%i' % p] else 0

    feature["MACD金叉"] = 1 if last['diff'] > last['dea'] else 0
    feature["MACD死叉"] = 1 if last['diff'] < last['dea'] else 0

    return {ka.name + k: v for k, v in feature.items()}

In [21]:
get_ka_feature(ka)

{'本级别分型标记': 0,
 '本级别笔标记': 0,
 '本级别线段标记': 0,
 '本级别向上笔背驰': 0,
 '本级别向下笔背驰': 0,
 '本级别向上线段背驰': 0,
 '本级别向下线段背驰': 0,
 '本级别收于MA5上方': 1,
 '本级别收于MA20上方': 1,
 '本级别收于MA120上方': 1,
 '本级别收于MA250上方': 1,
 '本级别MACD金叉': 1,
 '本级别MACD死叉': 0}