In [13]:
import sys
sys.path.append('../')

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import akshare as ak
import sqlite3
import matplotlib.pyplot as plt
%matplotlib inline

from datetime import datetime, date
from dateutil.relativedelta import relativedelta
from tqdm import tqdm
from database.downloader.downloader_base import DownloaderBase
import database.database_config as db_config

pd.options.display.max_rows=None
pd.options.display.max_columns=None

In [14]:
def plot_series_dist(series):
    data = series
    # 使用matplotlib画直方图
    plt.hist(data, bins=60, edgecolor='k', alpha=0.7)
    plt.xlabel('Value')
    plt.ylabel('Frequency')
    plt.title('Histogram of Data')
    plt.show()

class PreProcessing:
    def __init__(self, db_downloader:DownloaderBase) -> None:
        self.db_downloader = db_downloader

    def _build_label(self, stock_dataframe):
        N = 5 # 最大持仓周期 = N天，第N+1天开盘卖出
        df = stock_dataframe.copy()
        # 标签构建
        df['future_return'] = df['close'].shift(-N) / df['open'].shift(-1) - 1 # 计算第N日收益率
        # 极值处理
        df['future_return'] = np.clip(
            df['future_return'], 
            np.nanquantile(df['future_return'], 0.01), 
            np.nanquantile(df['future_return'], 0.99),
            )
        # 过滤第二天一字涨停情况
        df = df[df['high'].shift(-1) != df['low'].shift(-1)]
        return df[['datetime', 'future_return']]


    def _process_one_stock(self, stock_code, start_date, end_date):
        stock_base = self.db_downloader._download_stock_history_info(stock_code, start_date, end_date) # 获取历史行情
        stock_factor_date = self.db_downloader._download_stock_factor_date_info() # 获取日期特征
        stock_factor_qlib = self.db_downloader._download_stock_factor_qlib_info(stock_code, start_date, end_date) # 获取量价特征
        stock_label = self._build_label(stock_base) # 构建label
        stock_df = stock_base.merge(stock_label, on=['datetime']).merge(stock_factor_date, on=['datetime']).merge(stock_factor_qlib, on=['stock_code', 'datetime']) # 整合数据
        stock_df = stock_df.dropna()
        return stock_df
    
    def _process_all_stock(self, code_type, start_date, end_date):
        # stock_code_list = list(ak.stock_info_a_code_name()['code'].unique()) # 获取A股所有股票列表
        # stock_code_list = list(ak.index_stock_cons("000905")['品种代码'].unique()) # 获取中证500的股票代码列表
        # stock_code_list = list(ak.index_stock_cons("000300")['品种代码'].unique()) # 获取沪深300的股票代码列表
        stock_code_list = list(ak.index_stock_cons(code_type)['品种代码'].unique()) # 获取中证50的股票代码列表
        stock_df_list = []
        for stock_code in tqdm(stock_code_list, desc=f'Process: {code_type} ...'):
            stock_df = self._process_one_stock(stock_code, start_date, end_date)
            if not stock_df.empty:
                stock_df_list.append(stock_df)
        return pd.concat(stock_df_list)

In [15]:
db_conn = sqlite3.connect('../database/hh_quant.db')
db_downloader = DownloaderBase(db_conn, db_config)

proprocessor = PreProcessing(db_downloader=db_downloader)

## 使用Tensorflow

In [16]:
# 使用tensorflow处理原始数据
import numpy as np
import pandas as pd
import tensorflow as tf
print(tf.__version__)

2.15.0


In [17]:
class Senet(tf.keras.layers.Layer):
    def __init__(self, reduction_ratio=3, seed=1024, **kwargs):
        super(Senet, self).__init__(**kwargs)
        self.reduction_ratio = reduction_ratio
        self.seed = seed  

    def build(self, input_shape):
        self.field_size = len(input_shape)
        self.reduction_size = max(1, self.field_size // self.reduction_ratio)
        self.scale_layer = tf.keras.layers.Dense(units=self.reduction_size, activation='relu')
        self.expand_layer = tf.keras.layers.Dense(units=self.field_size, activation='relu')
        super(Senet, self).build(input_shape)

    def call(self, inputs, training=False):
        # print(f"Senet Is Training Mode: {training}")
        inputs = [tf.expand_dims(i, axis=1) for i in inputs]
        inputs = tf.concat(inputs, axis=1) # [B, N, dim]
        Z = tf.reduce_mean(inputs, axis=-1) # [B, N]
        A_1 = self.scale_layer(Z, training=training) # [B, X]
        A_2 = self.expand_layer(A_1, training=training) # [B, N]
        scale_inputs = tf.multiply(inputs, tf.expand_dims(A_2, axis=-1))
        output = scale_inputs + inputs # skip-connection
        return output # [B, N, dim]

    def get_config(self):
        config = super(Senet, self).get_config()
        config.update({
            'reduction_ratio': self.reduction_ratio,
            'seed': self.seed
        })
        return config

class Dnn(tf.keras.layers.Layer):
    def __init__(self, hidden_units=[64,32], activation="relu", dropout_rate=0.2, use_bn=True, seed=1024, **kwargs):
        super(Dnn, self).__init__(**kwargs)
        self.hidden_units = hidden_units
        self.activation = activation
        self.dropout_rate = dropout_rate
        self.use_bn = use_bn
        self.seed = seed
        self.dense_layers = []
        self.dropout_layers = []
        self.bn_layers = []
        
    def build(self, input_shape):
        for units in self.hidden_units:
            self.dense_layers.append(tf.keras.layers.Dense(units=units, activation=self.activation))
            self.dropout_layers.append(tf.keras.layers.Dropout(rate=self.dropout_rate, seed=self.seed))
            if self.use_bn:
                self.bn_layers.append(tf.keras.layers.BatchNormalization())
        super(Dnn, self).build(input_shape)  # Be sure to call this at the end
    
    def call(self, inputs, training=False):
        # print(f"Dnn Is Training Mode: {training}")
        x = inputs
        for i in range(len(self.hidden_units)):
            x = self.dense_layers[i](x)
            if self.use_bn:
                x = self.bn_layers[i](x, training=training)
            x = self.dropout_layers[i](x, training=training)
        return x

    def get_config(self):
        config = super(Dnn, self).get_config()
        config.update({
            'hidden_units': self.hidden_units,
            'activation': self.activation,
            'dropout_rate': self.dropout_rate,
            'use_bn': self.use_bn,
            'seed': self.seed
        })
        return config
    
class QuantModel(tf.keras.Model):
	def __init__(self, config, **kwargs):
		super(QuantModel, self).__init__(**kwargs)
		self.config = config

		# 添加属性来存储预定义的层
		self.lookup_layers = {}
		self.embedding_layers = {}

        # 创建连续特征的离散化层和嵌入层
		for feature_name, boundaries in self.config.get("numeric_features_with_boundaries").items():
			self.lookup_layers[feature_name] = tf.keras.layers.Discretization(bin_boundaries=boundaries, output_mode='int', name=f'{feature_name}_lookup')
			self.embedding_layers[feature_name] = tf.keras.layers.Embedding(input_dim=len(boundaries) + 1, output_dim=self.config.get("feature_embedding_dims", 6), name=f'{feature_name}_embedding')
        # 创建整数特征的查找层和嵌入层
		for feature_name, vocab in self.config.get("integer_categorical_features_with_vocab").items():
			self.lookup_layers[feature_name] = tf.keras.layers.IntegerLookup(vocabulary=vocab, name=f'{feature_name}_lookup')
			self.embedding_layers[feature_name] = tf.keras.layers.Embedding(input_dim=len(vocab) + 1, output_dim=self.config.get("feature_embedding_dims", 6), name=f'{feature_name}_embedding')
		# 创建字符串特征的查找层和嵌入层
		for feature_name, vocab in self.config.get("string_categorical_features_with_vocab").items():
			self.lookup_layers[feature_name] = tf.keras.layers.StringLookup(vocabulary=vocab, name=f'{feature_name}_lookup')
			self.embedding_layers[feature_name] = tf.keras.layers.Embedding(input_dim=len(vocab) + 1, output_dim=self.config.get("feature_embedding_dims", 6), name=f'{feature_name}_embedding')

		# 任务Dnn层
		self.task_tower_list = []
		for task_type in self.config['task_type']:
			task_tower = tf.keras.Sequential([
				Senet(reduction_ratio=self.config.get('reduction_ratio', 3), seed=self.config.get('seed', 1024)),
				tf.keras.layers.Flatten(),
				Dnn(
					hidden_units=self.config.get('dnn_hidden_units', [64,32]), 
					activation=self.config.get('dnn_activation', 'relu'), 
					dropout_rate=self.config.get('dnn_dropout', 0.2), 
					use_bn=self.config.get('dnn_use_bn', True), 
					seed=self.config.get('seed', 1024),
				),
				tf.keras.layers.Dense(1, activation=None, name=task_type)
			])
			self.task_tower_list.append(task_tower)

	def call(self, inputs, training=False):
		# print(f"QuantModel Is Training Mode: {training}")
		# 确保inputs是一个字典类型，每个键值对应一个特征输入
		if not isinstance(inputs, dict): 
			raise ValueError('The inputs to the model should be a dictionary where keys are feature names.')
		encoded_features = []
    	# 现在使用已经实例化的层来编码输入
		for feature_name, feature_value in inputs.items():
        	# 使用预定义的查找层和嵌入层
			lookup_layer = self.lookup_layers[feature_name]
			embedding_layer = self.embedding_layers[feature_name]
			encoded_feature = embedding_layer(lookup_layer(feature_value))
			encoded_features.append(encoded_feature)
		
		# task任务塔
		logits_list = []
		for task_tower in self.task_tower_list:
			task_output = task_tower(encoded_features)
			logits_list.append(task_output)
		return logits_list
	
	def get_config(self):
		# 调用基类的get_config方法（如果基类实现了get_config）
		config = super(QuantModel, self).get_config()
        # 添加QuantModel特有的配置信息
		config.update({
            # 假设self.config是一个可序列化的字典，如果不是，你可能需要在这里适当地处理它
            'config': self.config
        })
		return config

In [18]:
def extract_train_val_data(df, train_start_date, train_end_date, val_start_date, val_end_date):
    train_start_date = pd.to_datetime(train_start_date)
    train_end_date = pd.to_datetime(train_end_date)
    val_start_date = pd.to_datetime(val_start_date)
    val_end_date = pd.to_datetime(val_end_date)

    train_data = df[(pd.to_datetime(df['datetime']) >= train_start_date) & (pd.to_datetime(df['datetime']) <= train_end_date)]
    val_data = df[(pd.to_datetime(df['datetime']) >= val_start_date) & (pd.to_datetime(df['datetime']) <= val_end_date)]

    print(f"train_data_size: {train_data.shape}")
    print(f"validation_data_size: {val_data.shape}")
    return train_data, val_data

def transfer_data_type(df, columns, dtype):
    for col in columns:
        df[col] = df[col].astype(dtype)
    return df

def get_numeric_boundaries(series, num_bins=30):
    if series.nunique() < num_bins:
        boundaries = sorted(series.unique())
    else:
        boundaries = pd.qcut(series, num_bins, retbins=True, duplicates='drop')[1].tolist()
    return boundaries

def df_to_dataset(dataframe, feature_cols, label_cols, shuffle=True, batch_size=32):
    features = dataframe[feature_cols]
    labels = [dataframe[label_col] for label_col in label_cols]
    ds = tf.data.Dataset.from_tensor_slices((dict(features), tuple(labels)))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(features))
    ds = ds.batch(batch_size)
    ds = ds.prefetch(batch_size)
    return ds

In [19]:
backtest_start_date = '20200101'
backtest_end_date = '20240101'
train_period = 6 # year：训练数据周期长度
update_period = 6 # month：模型更新周期长度

def get_rolling_date_period(backtest_start_date, backtest_end_date, training_period, update_period):
    backtest_start_date = datetime.strptime(backtest_start_date, '%Y%m%d')
    backtest_end_date = datetime.strptime(backtest_end_date, '%Y%m%d')
    result = []
    rolling_flag = True
    while rolling_flag:
        current_val_start_date = backtest_start_date
        current_val_end_date = current_val_start_date + relativedelta(months=update_period) - relativedelta(days=1)
        if current_val_start_date < backtest_end_date:
            current_train_start_date = current_val_start_date - relativedelta(years=training_period)
            current_train_end_date = current_val_start_date - relativedelta(days=1)
            result.append([
                current_train_start_date.strftime("%Y%m%d"),
                current_train_end_date.strftime("%Y%m%d"),
                current_val_start_date.strftime("%Y%m%d"),
                current_val_end_date.strftime("%Y%m%d")
                ])
            backtest_start_date += relativedelta(months=update_period) 
        else:
            rolling_flag=False # 结束滚动训练
    return result

rolling_period = get_rolling_date_period(backtest_start_date, backtest_end_date, train_period, update_period)
rolling_period

[['20140101', '20191231', '20200101', '20200630'],
 ['20140701', '20200630', '20200701', '20201231'],
 ['20150101', '20201231', '20210101', '20210630'],
 ['20150701', '20210630', '20210701', '20211231'],
 ['20160101', '20211231', '20220101', '20220630'],
 ['20160701', '20220630', '20220701', '20221231'],
 ['20170101', '20221231', '20230101', '20230630'],
 ['20170701', '20230630', '20230701', '20231231']]

In [22]:
feature_config = {
    "target_feature_name": ["future_return"],
    "numeric_features": ['KMID', 'KLEN', 'KMID2', 'KUP', 'KUP2', 'KLOW', 'KLOW2', 'KSFT', 'KSFT2', 'OPEN0', 'OPEN1', 'OPEN2', 'OPEN3', 'OPEN4', 'HIGH0', 'HIGH1', 'HIGH2', 'HIGH3', 'HIGH4', 'LOW0', 'LOW1', 'LOW2', 'LOW3', 'LOW4', 'CLOSE0', 'CLOSE1', 'CLOSE2', 'CLOSE3', 'CLOSE4', 'VOLUME0', 'VOLUME1', 'VOLUME2', 'VOLUME3', 'VOLUME4', 'ROC5', 'ROC10', 'ROC20', 'ROC30', 'ROC60', 'MAX5', 'MAX10', 'MAX20', 'MAX30', 'MAX60', 'MIN5', 'MIN10', 'MIN20', 'MIN30', 'MIN60', 'MA5', 'MA10', 'MA20', 'MA30', 'MA60', 'STD5', 'STD10', 'STD20', 'STD30', 'STD60', 'BETA5', 'BETA10', 'BETA20', 'BETA30', 'BETA60', 'RSQR5', 'RSQR10', 'RSQR20', 'RSQR30', 'RSQR60', 'RESI5', 'RESI10', 'RESI20', 'RESI30', 'RESI60', 'QTLU5', 'QTLU10', 'QTLU20', 'QTLU30', 'QTLU60', 'QTLD5', 'QTLD10', 'QTLD20', 'QTLD30', 'QTLD60', 'TSRANK5', 'TSRANK10', 'TSRANK20', 'TSRANK30', 'TSRANK60', 'RSV5', 'RSV10', 'RSV20', 'RSV30', 'RSV60', 'IMAX5', 'IMAX10', 'IMAX20', 'IMAX30', 'IMAX60', 'IMIN5', 'IMIN10', 'IMIN20', 'IMIN30', 'IMIN60', 'IMXD5', 'IMXD10', 'IMXD20', 'IMXD30', 'IMXD60', 'CORR5', 'CORR10', 'CORR20', 'CORR30', 'CORR60', 'CORD5', 'CORD10', 'CORD20', 'CORD30', 'CORD60', 'CNTP5', 'CNTP10', 'CNTP20', 'CNTP30', 'CNTP60', 'CNTN5', 'CNTN10', 'CNTN20', 'CNTN30', 'CNTN60', 'CNTD5', 'CNTD10', 'CNTD20', 'CNTD30', 'CNTD60', 'SUMP5', 'SUMP10', 'SUMP20', 'SUMP30', 'SUMP60', 'SUMN5', 'SUMN10', 'SUMN20', 'SUMN30', 'SUMN60', 'SUMD5', 'SUMD10', 'SUMD20', 'SUMD30', 'SUMD60', 'VMA5', 'VMA10', 'VMA20', 'VMA30', 'VMA60', 'VSTD5', 'VSTD10', 'VSTD20', 'VSTD30', 'VSTD60', 'WVMA5', 'WVMA10', 'WVMA20', 'WVMA30', 'WVMA60', 'VSUMP5', 'VSUMP10', 'VSUMP20', 'VSUMP30', 'VSUMP60', 'VSUMN5', 'VSUMN10', 'VSUMN20', 'VSUMN30', 'VSUMN60', 'VSUMD5', 'VSUMD10', 'VSUMD20', 'VSUMD30', 'VSUMD60'],
    "integer_categorical_features": ['weekday', 'day_of_month', 'month'],
    "string_categorical_features": ['day_of_week', 'season']
}
full_feature_names = feature_config.get('numeric_features', []) + feature_config.get('integer_categorical_features', []) + feature_config.get('string_categorical_features', [])
stock_pool = '000016' # 上证50所有股票作为训练数据
batch_size = 128

for date_period in tqdm(rolling_period, desc='Rolling Training...'):
    train_start_date, train_end_date, val_start_date, val_end_date = date_period
    print(f"train_start: {train_start_date}, train_end: {train_end_date}, val_start: {val_start_date}, val_end: {val_end_date}")
    # 1. 获取所有股票信息
    df = proprocessor._process_all_stock(code_type=stock_pool, start_date=train_start_date, end_date=val_end_date)
    # 2. 拆分训练数据&验证数据
    train_data, val_data = extract_train_val_data(df, train_start_date, train_end_date, val_start_date, val_end_date)
    # 3. 构建训练集和验证集
    train_ds = df_to_dataset(train_data, full_feature_names, feature_config.get('target_feature_name', []), shuffle=True, batch_size=batch_size)
    val_ds = df_to_dataset(val_data, full_feature_names, feature_config.get('target_feature_name', []), shuffle=False, batch_size=batch_size)
    # 4. 配置模型相关参数
    model_config = {
        "seed": 1024,
        "reduction_ratio": 3,
        "dnn_hidden_units": [64,32],
        "dnn_activation": 'relu',
        "dnn_dropout": 0.1,
        "dnn_use_bn": True,
        "numeric_features_with_boundaries": {k: list(get_numeric_boundaries(train_data[k])) for k in feature_config.get('numeric_features', [])},
        "integer_categorical_features_with_vocab": {k: list(train_data[k].unique()) for k in feature_config.get('integer_categorical_features', [])},
        "string_categorical_features_with_vocab": {k: list(train_data[k].unique()) for k in feature_config.get('string_categorical_features', [])},
        "feature_embedding_dims": 6,
        "task_type": ['reg'],
    }
    # 5. 初始化模型
    model = QuantModel(model_config)
    # 6. 配置训练相关参数
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        verbose=2,
        patience=10,
        mode='min',
        restore_best_weights=True,
    )
    # 7. 配置Tensorboard记录功能
    log_dir = "./logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    # 8. 配置optimizer
    optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=1e-4) # for Mac M1/M2
    # optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4) # for intel
    loss = [tf.keras.losses.MeanSquaredError()]
    model.compile(optimizer=optimizer, loss=loss)
    model.fit(
            train_ds, 
            validation_data=val_ds, 
            epochs=50,
            verbose=2,
            callbacks=[tensorboard_callback, early_stopping]
    )
    # 9. 配置保存模型功能
    # model_save_path = f'./models/saved_model/model_of_{val_start_date}'
    # model.save(model_save_path)
    # best_model = tf.keras.models.load_model('./best_model')

    # 10. 记录预测集合
    model_red_result = model.predict(val_ds)
    output_df = val_data[['stock_code', 'stock_name', 'datetime']]
    output_df['future_return'] = val_data['future_return']
    output_df['future_return_pred'] = model_red_result[0]
    output_file_path = f'../../Offline/backtest/backtest_data/stock_selection_results_{val_start_date}.pkl'
    output_df.to_pickle(output_file_path)

Rolling Training...:   0%|          | 0/8 [00:00<?, ?it/s]

train_start: 20140101, train_end: 20191231, val_start: 20200101, val_end: 20200630


Process: 000016 ...: 100%|██████████| 50/50 [00:02<00:00, 24.90it/s]


train_data_size: (59264, 199)
validation_data_size: (5000, 199)
Epoch 1/50
463/463 - 12s - loss: 0.7604 - val_loss: 0.1998 - 12s/epoch - 27ms/step
Epoch 2/50
463/463 - 6s - loss: 0.3056 - val_loss: 0.1021 - 6s/epoch - 12ms/step
Epoch 3/50
463/463 - 6s - loss: 0.1898 - val_loss: 0.0552 - 6s/epoch - 12ms/step
Epoch 4/50
463/463 - 6s - loss: 0.1365 - val_loss: 0.0308 - 6s/epoch - 12ms/step
Epoch 5/50
463/463 - 6s - loss: 0.1020 - val_loss: 0.0188 - 6s/epoch - 13ms/step
Epoch 6/50
463/463 - 5s - loss: 0.0807 - val_loss: 0.0122 - 5s/epoch - 12ms/step
Epoch 7/50
463/463 - 6s - loss: 0.0634 - val_loss: 0.0085 - 6s/epoch - 13ms/step
Epoch 8/50
463/463 - 6s - loss: 0.0513 - val_loss: 0.0063 - 6s/epoch - 13ms/step
Epoch 9/50
463/463 - 6s - loss: 0.0406 - val_loss: 0.0049 - 6s/epoch - 12ms/step
Epoch 10/50
463/463 - 6s - loss: 0.0325 - val_loss: 0.0041 - 6s/epoch - 12ms/step
Epoch 11/50
463/463 - 6s - loss: 0.0256 - val_loss: 0.0036 - 6s/epoch - 12ms/step
Epoch 12/50
463/463 - 5s - loss: 0.0205 -

INFO:tensorflow:Assets written to: ./models/saved_model/model_of_20200101/assets




Rolling Training...:  12%|█▎        | 1/8 [04:25<30:58, 265.57s/it]

train_start: 20140701, train_end: 20200630, val_start: 20200701, val_end: 20201231


Process: 000016 ...: 100%|██████████| 50/50 [00:02<00:00, 24.62it/s]


train_data_size: (59964, 199)
validation_data_size: (5567, 199)
Epoch 1/50
469/469 - 12s - loss: 0.6161 - val_loss: 0.3057 - 12s/epoch - 27ms/step
Epoch 2/50
469/469 - 5s - loss: 0.2459 - val_loss: 0.0925 - 5s/epoch - 12ms/step
Epoch 3/50
469/469 - 5s - loss: 0.1557 - val_loss: 0.0496 - 5s/epoch - 11ms/step
Epoch 4/50
469/469 - 5s - loss: 0.1089 - val_loss: 0.0269 - 5s/epoch - 12ms/step
Epoch 5/50
469/469 - 5s - loss: 0.0802 - val_loss: 0.0158 - 5s/epoch - 11ms/step
Epoch 6/50
469/469 - 5s - loss: 0.0613 - val_loss: 0.0102 - 5s/epoch - 11ms/step
Epoch 7/50
469/469 - 5s - loss: 0.0471 - val_loss: 0.0070 - 5s/epoch - 11ms/step
Epoch 8/50
469/469 - 5s - loss: 0.0368 - val_loss: 0.0053 - 5s/epoch - 11ms/step
Epoch 9/50
469/469 - 6s - loss: 0.0290 - val_loss: 0.0045 - 6s/epoch - 12ms/step
Epoch 10/50
469/469 - 5s - loss: 0.0226 - val_loss: 0.0039 - 5s/epoch - 12ms/step
Epoch 11/50
469/469 - 5s - loss: 0.0176 - val_loss: 0.0035 - 5s/epoch - 12ms/step
Epoch 12/50
469/469 - 5s - loss: 0.0135 -

INFO:tensorflow:Assets written to: ./models/saved_model/model_of_20200701/assets




Rolling Training...:  25%|██▌       | 2/8 [09:06<27:27, 274.62s/it]

train_start: 20150101, train_end: 20201231, val_start: 20210101, val_end: 20210630


Process: 000016 ...: 100%|██████████| 50/50 [00:02<00:00, 24.28it/s]


train_data_size: (60799, 199)
validation_data_size: (5307, 199)
Epoch 1/50
475/475 - 12s - loss: 0.7925 - val_loss: 0.0484 - 12s/epoch - 25ms/step
Epoch 2/50
475/475 - 5s - loss: 0.3231 - val_loss: 0.1080 - 5s/epoch - 11ms/step
Epoch 3/50
475/475 - 5s - loss: 0.2067 - val_loss: 0.0557 - 5s/epoch - 11ms/step
Epoch 4/50
475/475 - 5s - loss: 0.1469 - val_loss: 0.0301 - 5s/epoch - 11ms/step
Epoch 5/50
475/475 - 5s - loss: 0.1124 - val_loss: 0.0180 - 5s/epoch - 11ms/step
Epoch 6/50
475/475 - 5s - loss: 0.0875 - val_loss: 0.0119 - 5s/epoch - 11ms/step
Epoch 7/50
475/475 - 5s - loss: 0.0708 - val_loss: 0.0084 - 5s/epoch - 11ms/step
Epoch 8/50
475/475 - 5s - loss: 0.0569 - val_loss: 0.0065 - 5s/epoch - 11ms/step
Epoch 9/50
475/475 - 5s - loss: 0.0455 - val_loss: 0.0053 - 5s/epoch - 11ms/step
Epoch 10/50
475/475 - 5s - loss: 0.0354 - val_loss: 0.0046 - 5s/epoch - 11ms/step
Epoch 11/50
475/475 - 5s - loss: 0.0280 - val_loss: 0.0042 - 5s/epoch - 11ms/step
Epoch 12/50
475/475 - 5s - loss: 0.0220 -

INFO:tensorflow:Assets written to: ./models/saved_model/model_of_20210101/assets




Rolling Training...:  38%|███▊      | 3/8 [13:30<22:28, 269.64s/it]

train_start: 20150701, train_end: 20210630, val_start: 20210701, val_end: 20211231


Process: 000016 ...: 100%|██████████| 50/50 [00:02<00:00, 24.21it/s]


train_data_size: (61763, 199)
validation_data_size: (5723, 199)
Epoch 1/50
483/483 - 13s - loss: 0.5469 - val_loss: 0.0405 - 13s/epoch - 26ms/step
Epoch 2/50
483/483 - 6s - loss: 0.2170 - val_loss: 0.0683 - 6s/epoch - 12ms/step
Epoch 3/50
483/483 - 6s - loss: 0.1359 - val_loss: 0.0364 - 6s/epoch - 12ms/step
Epoch 4/50
483/483 - 6s - loss: 0.0969 - val_loss: 0.0213 - 6s/epoch - 12ms/step
Epoch 5/50
483/483 - 6s - loss: 0.0724 - val_loss: 0.0136 - 6s/epoch - 12ms/step
Epoch 6/50
483/483 - 6s - loss: 0.0565 - val_loss: 0.0095 - 6s/epoch - 12ms/step
Epoch 7/50
483/483 - 6s - loss: 0.0441 - val_loss: 0.0072 - 6s/epoch - 12ms/step
Epoch 8/50
483/483 - 6s - loss: 0.0341 - val_loss: 0.0057 - 6s/epoch - 12ms/step
Epoch 9/50
483/483 - 6s - loss: 0.0259 - val_loss: 0.0049 - 6s/epoch - 12ms/step
Epoch 10/50
483/483 - 6s - loss: 0.0196 - val_loss: 0.0043 - 6s/epoch - 12ms/step
Epoch 11/50
483/483 - 6s - loss: 0.0149 - val_loss: 0.0039 - 6s/epoch - 12ms/step
Epoch 12/50
483/483 - 6s - loss: 0.0111 -

INFO:tensorflow:Assets written to: ./models/saved_model/model_of_20210701/assets




Rolling Training...:  50%|█████     | 4/8 [17:55<17:52, 268.04s/it]

train_start: 20160101, train_end: 20211231, val_start: 20220101, val_end: 20220630


Process: 000016 ...: 100%|██████████| 50/50 [00:02<00:00, 23.42it/s]


train_data_size: (62931, 199)
validation_data_size: (5482, 199)
Epoch 1/50
492/492 - 13s - loss: 0.7202 - val_loss: 0.1032 - 13s/epoch - 26ms/step
Epoch 2/50
492/492 - 5s - loss: 0.2818 - val_loss: 0.0827 - 5s/epoch - 11ms/step
Epoch 3/50
492/492 - 5s - loss: 0.1722 - val_loss: 0.0369 - 5s/epoch - 11ms/step
Epoch 4/50
492/492 - 5s - loss: 0.1234 - val_loss: 0.0188 - 5s/epoch - 11ms/step
Epoch 5/50
492/492 - 5s - loss: 0.0948 - val_loss: 0.0111 - 5s/epoch - 11ms/step
Epoch 6/50
492/492 - 5s - loss: 0.0753 - val_loss: 0.0075 - 5s/epoch - 11ms/step
Epoch 7/50
492/492 - 5s - loss: 0.0596 - val_loss: 0.0056 - 5s/epoch - 11ms/step
Epoch 8/50
492/492 - 5s - loss: 0.0480 - val_loss: 0.0044 - 5s/epoch - 11ms/step
Epoch 9/50
492/492 - 5s - loss: 0.0374 - val_loss: 0.0037 - 5s/epoch - 11ms/step
Epoch 10/50
492/492 - 5s - loss: 0.0294 - val_loss: 0.0033 - 5s/epoch - 11ms/step
Epoch 11/50
492/492 - 5s - loss: 0.0227 - val_loss: 0.0030 - 5s/epoch - 11ms/step
Epoch 12/50
492/492 - 5s - loss: 0.0178 -

INFO:tensorflow:Assets written to: ./models/saved_model/model_of_20220101/assets




Rolling Training...:  62%|██████▎   | 5/8 [22:11<13:10, 263.63s/it]

train_start: 20160701, train_end: 20220630, val_start: 20220701, val_end: 20221231


Process: 000016 ...: 100%|██████████| 50/50 [00:02<00:00, 23.00it/s]


train_data_size: (63834, 199)
validation_data_size: (5909, 199)
Epoch 1/50
499/499 - 12s - loss: 0.8581 - val_loss: 0.2136 - 12s/epoch - 24ms/step
Epoch 2/50
499/499 - 6s - loss: 0.3462 - val_loss: 0.1097 - 6s/epoch - 11ms/step
Epoch 3/50
499/499 - 6s - loss: 0.2199 - val_loss: 0.0546 - 6s/epoch - 11ms/step
Epoch 4/50
499/499 - 6s - loss: 0.1573 - val_loss: 0.0302 - 6s/epoch - 11ms/step
Epoch 5/50
499/499 - 6s - loss: 0.1203 - val_loss: 0.0183 - 6s/epoch - 11ms/step
Epoch 6/50
499/499 - 5s - loss: 0.0932 - val_loss: 0.0119 - 5s/epoch - 11ms/step
Epoch 7/50
499/499 - 6s - loss: 0.0728 - val_loss: 0.0079 - 6s/epoch - 11ms/step
Epoch 8/50
499/499 - 6s - loss: 0.0574 - val_loss: 0.0057 - 6s/epoch - 11ms/step
Epoch 9/50
499/499 - 6s - loss: 0.0442 - val_loss: 0.0043 - 6s/epoch - 11ms/step
Epoch 10/50
499/499 - 6s - loss: 0.0338 - val_loss: 0.0033 - 6s/epoch - 11ms/step
Epoch 11/50
499/499 - 6s - loss: 0.0260 - val_loss: 0.0028 - 6s/epoch - 11ms/step
Epoch 12/50
499/499 - 6s - loss: 0.0195 -

INFO:tensorflow:Assets written to: ./models/saved_model/model_of_20220701/assets




Rolling Training...:  75%|███████▌  | 6/8 [26:46<08:54, 267.31s/it]

train_start: 20170101, train_end: 20221231, val_start: 20230101, val_end: 20230630


Process: 000016 ...: 100%|██████████| 50/50 [00:02<00:00, 20.11it/s]


train_data_size: (64971, 199)
validation_data_size: (5649, 199)
Epoch 1/50
508/508 - 14s - loss: 0.8346 - val_loss: 0.1498 - 14s/epoch - 28ms/step
Epoch 2/50
508/508 - 7s - loss: 0.3116 - val_loss: 0.0946 - 7s/epoch - 14ms/step
Epoch 3/50
508/508 - 6s - loss: 0.1976 - val_loss: 0.0458 - 6s/epoch - 12ms/step
Epoch 4/50
508/508 - 6s - loss: 0.1416 - val_loss: 0.0251 - 6s/epoch - 11ms/step
Epoch 5/50
508/508 - 6s - loss: 0.1089 - val_loss: 0.0155 - 6s/epoch - 12ms/step
Epoch 6/50
508/508 - 6s - loss: 0.0858 - val_loss: 0.0103 - 6s/epoch - 11ms/step
Epoch 7/50
508/508 - 6s - loss: 0.0676 - val_loss: 0.0074 - 6s/epoch - 11ms/step
Epoch 8/50
508/508 - 6s - loss: 0.0527 - val_loss: 0.0055 - 6s/epoch - 11ms/step
Epoch 9/50
508/508 - 6s - loss: 0.0417 - val_loss: 0.0043 - 6s/epoch - 11ms/step
Epoch 10/50
508/508 - 6s - loss: 0.0323 - val_loss: 0.0035 - 6s/epoch - 11ms/step
Epoch 11/50
508/508 - 6s - loss: 0.0242 - val_loss: 0.0029 - 6s/epoch - 11ms/step
Epoch 12/50
508/508 - 6s - loss: 0.0180 -

INFO:tensorflow:Assets written to: ./models/saved_model/model_of_20230101/assets




Rolling Training...:  88%|████████▊ | 7/8 [31:36<04:34, 274.73s/it]

train_start: 20170701, train_end: 20230630, val_start: 20230701, val_end: 20231231


Process: 000016 ...: 100%|██████████| 50/50 [00:02<00:00, 22.25it/s]


train_data_size: (66185, 199)
validation_data_size: (5950, 199)
Epoch 1/50
518/518 - 12s - loss: 0.8566 - val_loss: 0.1103 - 12s/epoch - 23ms/step
Epoch 2/50
518/518 - 6s - loss: 0.3121 - val_loss: 0.0882 - 6s/epoch - 11ms/step
Epoch 3/50
518/518 - 6s - loss: 0.2020 - val_loss: 0.0435 - 6s/epoch - 11ms/step
Epoch 4/50
518/518 - 6s - loss: 0.1456 - val_loss: 0.0238 - 6s/epoch - 11ms/step
Epoch 5/50
518/518 - 6s - loss: 0.1120 - val_loss: 0.0137 - 6s/epoch - 11ms/step
Epoch 6/50
518/518 - 6s - loss: 0.0868 - val_loss: 0.0085 - 6s/epoch - 11ms/step
Epoch 7/50
518/518 - 6s - loss: 0.0676 - val_loss: 0.0056 - 6s/epoch - 12ms/step
Epoch 8/50
518/518 - 6s - loss: 0.0543 - val_loss: 0.0038 - 6s/epoch - 11ms/step
Epoch 9/50
518/518 - 6s - loss: 0.0409 - val_loss: 0.0028 - 6s/epoch - 12ms/step
Epoch 10/50
518/518 - 6s - loss: 0.0320 - val_loss: 0.0022 - 6s/epoch - 11ms/step
Epoch 11/50
518/518 - 6s - loss: 0.0244 - val_loss: 0.0018 - 6s/epoch - 11ms/step
Epoch 12/50
518/518 - 6s - loss: 0.0184 -

INFO:tensorflow:Assets written to: ./models/saved_model/model_of_20230701/assets




Rolling Training...: 100%|██████████| 8/8 [36:41<00:00, 275.22s/it]


In [34]:
# %tensorboard --logdir ./logs/fit