In [1]:
import pandas as pd
import numpy as np

from datasource import *

# pandas， swifter 加速
# pip install swifter 
# pip install swifter[groupby] 

In [2]:
def agg_ticks_to_klines(source_df: pd.DataFrame, target_interval='5S'):
    # 确保'time'列是datetime类型
    source_df['datetime'] = pd.to_datetime(source_df['datetime'])

    # 将标的和时间作为分组关键字，使用groupby聚合
    grouped_df = source_df.groupby(['code', pd.Grouper(key='datetime', freq=target_interval)])

    # 对每个分组应用聚合操作，例如计算OHLC和总成交量
    kline_df = grouped_df.agg({
        'price': ['first', 'max', 'min', 'last'],  # open, high, low, close
        'volume': 'sum'
    })

    # 重置索引，将分组关键字还原为列
    kline_df = kline_df.reset_index()

    # 打印结果
    print(kline_df)

    return kline_df


In [3]:
code_list = [
    ['518880.XSHG','518880.XSHG'], #黄金ETF  518880 159934
    ['513100.XSHG','159941.XSHG'], #纳指ETF  159941 513100 
    ['399006.XSHE','159915.XSHE'], #创业板 159915 399006 159915 159952 512900
    # ['513050.XSHG','159607.XSHG'], #中概互联 ETF 513050 159607
    # ['399673.XSHE','159949.XSHE'], #创业板50 399673 159949
    ['000016.XSHG','510050.XSHG'], #上证50 000016 510050 510710 510850
    # ['000300.XSHG','510300.XSHG'], #沪深300
    # ['000905.XSHG','510500.XSHG'] #中证500
    # ['399975.XSHE','399975.XSHE'], #证劵公司 399975 512880 512000 512900
    # ['399987.XSHE','399987.XSHE'], #中证酒
    # ['399932.XSHE','159928.XSHE'], #消费ETF
    # ['000913.XSHG','000913.XSHG'], #300医药 000913 399913 000913
    # ['515000.XSHG','515000.XSHG'], #科技 515000
    # ['000015.XSHG','510880.XSHG'], #红利ETF
]

code_list_ = [l[1].split(".")[0] for l in code_list]

def get_source_df():
    fund_etf_hist_min_em_df = fund_etf_hist_min_em(
        symbol = code_list_[2],
        period="1",
        adjust="qfq",
        start_date="2024-01-02 09:30:00",
        end_date="2024-01-08 15:00:00",
    )

    fund_etf_hist_min_em_df.columns = [
        "date",
        "open",
        "close",
        "high",
        "low",
        "volume",
        "amount",
        "price",
    ]

    source_df = fund_etf_hist_min_em_df
    source_df["date"] = pd.to_datetime(source_df["date"])
    # source_df = fund_etf_hist_min_em_df.set_index(["date"], drop=True, inplace=False)
    print(source_df)
    return source_df

# source_df = get_source_df()

In [4]:
def tick_data_mock(code):
    # 设置交易时间范围
    trading_hours_1 = pd.date_range("2024-01-01 09:30:00", "2024-01-01 11:30:00", freq='S')
    trading_hours_2 = pd.date_range("2024-01-01 13:00:00", "2024-01-01 15:00:00", freq='S')
    trading_hours = trading_hours_1.union(trading_hours_2)

    # 生成tick数据
    tick_data = pd.DataFrame({
        'datetime': trading_hours,
        'code': [code] * len(trading_hours),
        'price': np.random.rand(len(trading_hours)) * 10 + 100,  # 生成随机价格，假设在100到110之间
        'volume': np.random.randint(1, 100, size=len(trading_hours))  # 生成随机成交量
    })

    # 打印生成的tick数据
    print(tick_data)
    return tick_data

def tick_data_list_mock():
    df = pd.concat([tick_data_mock(code=code) for code in code_list_])
    return df

In [5]:
df_all = tick_data_list_mock()

                 datetime    code       price  volume
0     2024-01-01 09:30:00  518880  104.820917      29
1     2024-01-01 09:30:01  518880  108.129601      64
2     2024-01-01 09:30:02  518880  100.916099      15
3     2024-01-01 09:30:03  518880  107.842745      76
4     2024-01-01 09:30:04  518880  106.521618      43
...                   ...     ...         ...     ...
14397 2024-01-01 14:59:56  518880  105.825826      61
14398 2024-01-01 14:59:57  518880  109.858508      86
14399 2024-01-01 14:59:58  518880  106.236324      73
14400 2024-01-01 14:59:59  518880  102.979709      97
14401 2024-01-01 15:00:00  518880  100.784758       1

[14402 rows x 4 columns]
                 datetime    code       price  volume
0     2024-01-01 09:30:00  159941  103.758102      30
1     2024-01-01 09:30:01  159941  105.833534      33
2     2024-01-01 09:30:02  159941  101.492909      84
3     2024-01-01 09:30:03  159941  104.753024      47
4     2024-01-01 09:30:04  159941  101.578283      58
..

In [6]:
new_5s_df_all = agg_ticks_to_klines(df_all)

         code            datetime       price                          \
                                        first         max         min   
0      159915 2024-01-01 09:30:00  108.281930  108.281930  104.151050   
1      159915 2024-01-01 09:30:05  106.972438  106.972438  103.032018   
2      159915 2024-01-01 09:30:10  100.986174  109.697448  100.986174   
3      159915 2024-01-01 09:30:15  109.268868  109.268868  102.906309   
4      159915 2024-01-01 09:30:20  101.440198  107.259539  101.149417   
...       ...                 ...         ...         ...         ...   
11523  518880 2024-01-01 14:59:40  109.027898  109.803560  101.416578   
11524  518880 2024-01-01 14:59:45  106.623890  108.217638  101.008764   
11525  518880 2024-01-01 14:59:50  103.086865  108.896717  103.086865   
11526  518880 2024-01-01 14:59:55  103.614110  109.858508  102.979709   
11527  518880 2024-01-01 15:00:00  100.784758  100.784758  100.784758   

                  volume  
             last    su

In [35]:
from data_repo_v1 import get_new_cycle_records

new_5s_df_all.columns = [
    "code",
    "date",
    "open",
    "high",
    "low",      
    "close",
    "volume",
]

def agg_klines_to_klines_v1(df_all, target_cycle = 60 * 1000):
    """
    df_all 5s kline
    """
    df_all['date'] = pd.to_datetime(df_all['date'])

    
    # 将标的和时间作为分组关键字，使用groupby聚合
    # grouped_df = source_df.groupby(['code', pd.Grouper(key='datetime', freq=target_interval)])

    # 对每个分组应用聚合操作，例如计算OHLC和总成交量
    # kline_df = grouped_df.agg({
    #     'price': ['first', 'max', 'min', 'last'],  # open, high, low, close
    #     'volume': 'sum'
    # })


    grouped_df = df_all.groupby(['code'])
    # new_1m_df = get_new_cycle_records(new_5s_df_all, target_cycle=1000*60)
    def sub_klines_to_klines(df):
        print(df)
        return get_new_cycle_records(df, target_cycle=target_cycle)
    kline_df_all = grouped_df.apply(sub_klines_to_klines)

    # 重置索引，将分组关键字还原为列
    kline_df_all = kline_df_all.reset_index()

    # 打印结果
    print(kline_df_all)

    return kline_df_all

def agg_klines_to_klines_v2(df_all, freq = "15S"):
    """
    df_all to  freq 15s kline
    """

    df_all.columns = [
        "code",
        "datetime",
        "open",
        "high",
        "low",      
        "close",
        "volume",
    ]
    df_all['datetime'] = pd.to_datetime(df_all['datetime'])

    
    # 将标的和时间作为分组关键字，使用groupby聚合
    grouped_df = df_all.groupby(['code', pd.Grouper(key='datetime', freq=freq)])

    # 对每个分组应用聚合操作，例如计算OHLC和总成交量
    kline_df_all = grouped_df.agg({
        "open": "first",
        "high": "max",
        "low": "min",      
        "close": "last",
        # 'price': ['first', 'max', 'min', 'last'],  # open, high, low, close
        'volume': 'sum'
    })


    # grouped_df = df_all.groupby(['code'])
    # # new_1m_df = get_new_cycle_records(new_5s_df_all, target_cycle=1000*60)
    # def sub_klines_to_klines(df):
    #     print(df)
    #     return get_new_cycle_records(df, target_cycle=target_cycle)
    # kline_df_all = grouped_df.apply(sub_klines_to_klines)

    # 重置索引，将分组关键字还原为列
    kline_df_all = kline_df_all.reset_index()

    # 打印结果
    print(kline_df_all)

    return kline_df_all


In [33]:
new_15s_df = agg_klines_to_klines_v1(new_5s_df_all, target_cycle=1000*15)

        code                date        open        high         low  \
0     159915 2024-01-01 09:30:00  108.281930  108.281930  104.151050   
1     159915 2024-01-01 09:30:05  106.972438  106.972438  103.032018   
2     159915 2024-01-01 09:30:10  100.986174  109.697448  100.986174   
3     159915 2024-01-01 09:30:15  109.268868  109.268868  102.906309   
4     159915 2024-01-01 09:30:20  101.440198  107.259539  101.149417   
...      ...                 ...         ...         ...         ...   
2877  159915 2024-01-01 14:59:40  104.960517  109.462693  101.638793   
2878  159915 2024-01-01 14:59:45  100.108100  109.937868  100.108100   
2879  159915 2024-01-01 14:59:50  102.364260  106.537088  101.181140   
2880  159915 2024-01-01 14:59:55  109.789644  109.789644  101.341659   
2881  159915 2024-01-01 15:00:00  105.229317  105.229317  105.229317   

           close  volume  
0     105.755330     198  
1     104.357204     204  
2     109.697448     265  
3     107.417859     242  


In [14]:
new_15s_df

Unnamed: 0,code,level_1,date,open,close,high,low,volume
0,159915,0,2024-01-01 09:30:00,108.281930,109.697448,109.697448,100.986174,667
1,159915,1,2024-01-01 09:30:15,109.268868,106.767949,109.268868,101.149417,909
2,159915,2,2024-01-01 09:30:30,109.918832,101.061850,109.918832,101.006673,898
3,159915,3,2024-01-01 09:30:45,106.676883,106.293320,109.217639,101.757676,661
4,159915,4,2024-01-01 09:31:00,106.189841,101.541890,109.916247,101.097781,601
...,...,...,...,...,...,...,...,...
3839,518880,956,2024-01-01 14:58:55,109.964771,104.501335,109.964771,100.571175,866
3840,518880,957,2024-01-01 14:59:10,104.494519,104.796209,109.589830,100.960637,836
3841,518880,958,2024-01-01 14:59:25,109.784511,108.991164,109.784511,100.742289,941
3842,518880,959,2024-01-01 14:59:40,109.027898,107.512691,109.803560,101.008764,616


In [28]:
new_5m_df = agg_klines_to_klines_v1(new_5s_df_all, target_cycle=1000*60*5)

        code                date        open        high         low  \
0     159915 2024-01-01 09:30:00  108.281930  108.281930  104.151050   
1     159915 2024-01-01 09:30:05  106.972438  106.972438  103.032018   
2     159915 2024-01-01 09:30:10  100.986174  109.697448  100.986174   
3     159915 2024-01-01 09:30:15  109.268868  109.268868  102.906309   
4     159915 2024-01-01 09:30:20  101.440198  107.259539  101.149417   
...      ...                 ...         ...         ...         ...   
2877  159915 2024-01-01 14:59:40  104.960517  109.462693  101.638793   
2878  159915 2024-01-01 14:59:45  100.108100  109.937868  100.108100   
2879  159915 2024-01-01 14:59:50  102.364260  106.537088  101.181140   
2880  159915 2024-01-01 14:59:55  109.789644  109.789644  101.341659   
2881  159915 2024-01-01 15:00:00  105.229317  105.229317  105.229317   

           close  volume  
0     105.755330     198  
1     104.357204     204  
2     109.697448     265  
3     107.417859     242  


In [29]:
new_5m_df

Unnamed: 0,code,level_1,date,open,close,high,low,volume
0,159915,0,2024-01-01 09:30:00,108.281930,100.148578,109.951549,100.015167,14181
1,159915,1,2024-01-01 09:35:00,101.190896,103.968200,109.983396,100.057096,15412
2,159915,2,2024-01-01 09:40:00,103.748235,106.559753,109.990869,100.004234,15832
3,159915,3,2024-01-01 09:45:00,106.635980,105.343275,109.944853,100.005239,15327
4,159915,4,2024-01-01 09:50:00,107.034955,106.529090,109.951516,100.049781,15607
...,...,...,...,...,...,...,...,...
191,518880,44,2024-01-01 14:39:55,106.573255,102.520820,109.984553,100.013882,15013
192,518880,45,2024-01-01 14:44:55,109.740178,109.795234,109.928223,100.006481,15144
193,518880,46,2024-01-01 14:49:55,100.733870,104.931846,109.976939,100.007135,15281
194,518880,47,2024-01-01 14:54:55,109.808354,107.512691,109.964771,100.058755,14891


In [31]:
# new_5m_df[new_5m_df["code"] == "159915"]["close"].plot()

In [40]:
new_1min_df = agg_klines_to_klines_v2(new_5s_df_all, freq="1min")

       code            datetime        open        high         low  \
0    159915 2024-01-01 09:30:00  108.281930  109.918832  100.986174   
1    159915 2024-01-01 09:31:00  106.189841  109.951549  100.126325   
2    159915 2024-01-01 09:32:00  103.898165  109.593650  100.171301   
3    159915 2024-01-01 09:33:00  105.873349  109.930893  100.015167   
4    159915 2024-01-01 09:34:00  108.755879  109.885800  100.148578   
..      ...                 ...         ...         ...         ...   
963  518880 2024-01-01 14:56:00  104.890689  109.802939  100.058755   
964  518880 2024-01-01 14:57:00  100.500252  109.787261  100.194953   
965  518880 2024-01-01 14:58:00  106.164687  109.964771  100.086899   
966  518880 2024-01-01 14:59:00  103.675244  109.858508  100.571175   
967  518880 2024-01-01 15:00:00  100.784758  100.784758  100.784758   

          close  volume  
0    106.293320    3135  
1    102.702859    2425  
2    100.645316    2857  
3    108.234478    3028  
4    100.148578  

In [41]:
new_3min_df = agg_klines_to_klines_v2(new_5s_df_all, freq="3min")

       code            datetime        open        high         low  \
0    159915 2024-01-01 09:30:00  108.281930  109.951549  100.126325   
1    159915 2024-01-01 09:33:00  105.873349  109.930893  100.015167   
2    159915 2024-01-01 09:36:00  102.780226  109.983396  100.097172   
3    159915 2024-01-01 09:39:00  101.496587  109.977363  100.057096   
4    159915 2024-01-01 09:42:00  107.948428  109.990869  100.004234   
..      ...                 ...         ...         ...         ...   
323  518880 2024-01-01 14:48:00  101.596969  109.976939  100.231936   
324  518880 2024-01-01 14:51:00  106.498984  109.890346  100.007135   
325  518880 2024-01-01 14:54:00  101.785838  109.897896  100.058755   
326  518880 2024-01-01 14:57:00  100.500252  109.964771  100.086899   
327  518880 2024-01-01 15:00:00  100.784758  100.784758  100.784758   

          close  volume  
0    100.645316    8417  
1    108.037717    8653  
2    106.929200    9124  
3    104.271265   10100  
4    106.559753  

In [42]:
new_5mim_df = agg_klines_to_klines_v2(new_5s_df_all, freq="5min")

       code            datetime        open        high         low  \
0    159915 2024-01-01 09:30:00  108.281930  109.951549  100.015167   
1    159915 2024-01-01 09:35:00  101.190896  109.983396  100.057096   
2    159915 2024-01-01 09:40:00  103.748235  109.990869  100.004234   
3    159915 2024-01-01 09:45:00  106.635980  109.944853  100.005239   
4    159915 2024-01-01 09:50:00  107.034955  109.951516  100.049781   
..      ...                 ...         ...         ...         ...   
195  518880 2024-01-01 14:40:00  101.336930  109.984553  100.013882   
196  518880 2024-01-01 14:45:00  101.033617  109.928223  100.006481   
197  518880 2024-01-01 14:50:00  104.357534  109.976939  100.007135   
198  518880 2024-01-01 14:55:00  104.390404  109.964771  100.058755   
199  518880 2024-01-01 15:00:00  100.784758  100.784758  100.784758   

          close  volume  
0    100.148578   14181  
1    103.968200   15412  
2    106.559753   15832  
3    105.343275   15327  
4    106.529090  

In [43]:
new_15mim_df = agg_klines_to_klines_v2(new_5s_df_all, freq="15min")

      code            datetime        open        high         low  \
0   159915 2024-01-01 09:30:00  108.281930  109.990869  100.004234   
1   159915 2024-01-01 09:45:00  106.635980  109.983550  100.001548   
2   159915 2024-01-01 10:00:00  105.009032  109.991803  100.009960   
3   159915 2024-01-01 10:15:00  101.242598  109.999759  100.006505   
4   159915 2024-01-01 10:30:00  100.689183  109.998179  100.001945   
..     ...                 ...         ...         ...         ...   
67  518880 2024-01-01 14:00:00  104.345361  109.998027  100.024642   
68  518880 2024-01-01 14:15:00  109.177029  109.992368  100.004032   
69  518880 2024-01-01 14:30:00  101.419830  109.996758  100.003442   
70  518880 2024-01-01 14:45:00  101.033617  109.976939  100.006481   
71  518880 2024-01-01 15:00:00  100.784758  100.784758  100.784758   

         close  volume  
0   106.559753   45425  
1   106.617953   45865  
2   108.066213   46288  
3   104.811198   45015  
4   102.676304   44496  
..       

In [44]:
new_15S_df = agg_klines_to_klines_v2(new_5s_df_all, freq="15S")

        code            datetime        open        high         low  \
0     159915 2024-01-01 09:30:00  108.281930  109.697448  100.986174   
1     159915 2024-01-01 09:30:15  109.268868  109.268868  101.149417   
2     159915 2024-01-01 09:30:30  109.918832  109.918832  101.006673   
3     159915 2024-01-01 09:30:45  106.676883  109.217639  101.757676   
4     159915 2024-01-01 09:31:00  106.189841  109.916247  101.097781   
...      ...                 ...         ...         ...         ...   
3843  518880 2024-01-01 14:59:00  103.675244  108.442058  100.571175   
3844  518880 2024-01-01 14:59:15  108.107696  109.784511  100.742289   
3845  518880 2024-01-01 14:59:30  106.601769  109.803560  101.416578   
3846  518880 2024-01-01 14:59:45  106.623890  109.858508  101.008764   
3847  518880 2024-01-01 15:00:00  100.784758  100.784758  100.784758   

           close  volume  
0     109.697448     667  
1     106.767949     909  
2     101.061850     898  
3     106.293320     661  


In [45]:
new_30S_df = agg_klines_to_klines_v2(new_5s_df_all, freq="30S")

        code            datetime        open        high         low  \
0     159915 2024-01-01 09:30:00  108.281930  109.697448  100.986174   
1     159915 2024-01-01 09:30:30  109.918832  109.918832  101.006673   
2     159915 2024-01-01 09:31:00  106.189841  109.916247  101.003089   
3     159915 2024-01-01 09:31:30  107.762900  109.951549  100.126325   
4     159915 2024-01-01 09:32:00  103.898165  109.576168  100.171301   
...      ...                 ...         ...         ...         ...   
1923  518880 2024-01-01 14:58:00  106.164687  109.836173  100.170089   
1924  518880 2024-01-01 14:58:30  100.086899  109.964771  100.086899   
1925  518880 2024-01-01 14:59:00  103.675244  109.784511  100.571175   
1926  518880 2024-01-01 14:59:30  106.601769  109.858508  101.008764   
1927  518880 2024-01-01 15:00:00  100.784758  100.784758  100.784758   

           close  volume  
0     106.767949    1576  
1     106.293320    1559  
2     108.257222    1150  
3     102.702859    1275  
