In [12]:
import warnings
warnings.filterwarnings("ignore")

import math

import pandas as pd
import pandas_ta as ta
import akshare as ak
import numpy as np

from datetime import datetime, timedelta
from tqdm import tqdm

pd.options.display.max_rows=None
pd.options.display.max_columns=None

In [3]:
# 1. 获取中证50（000016）的股票列表
stock_code_list = ak.index_stock_cons('000016')['品种代码'].to_list()
stock_code_list[:5]

['688981', '688041', '601988', '601601', '600150']

In [13]:
def generate_order_signal(dataframe):
    df = dataframe[['datetime', 'close']].copy()
    df = df.sort_values(by=['datetime'])

    # 计算时机
    hold_period = 30
    gap_period = 3
    return_threshold = 0.1

    # 初始化操作列
    df['signal'] = 0  # 默认为0（无操作）

    # 初始化下一个可能的买入时间，用于确保我们不会在hold_period内再次买入
    next_possible_buy = 0

    for start_index in range(len(df) - hold_period):
        if start_index >= next_possible_buy:
            # 获取当前价格和未来hold_period天内的价格数据
            current_price = df.iloc[start_index]['close']
            future_window = df.iloc[start_index:start_index + hold_period]
            future_prices = future_window['close']
            
            # 如果未来hold_period天内有满足收益阈值的价格，确定买入和卖出时机
            max_future_price = future_prices.max()
            if max_future_price / current_price - 1 >= return_threshold:
                # 寻找最高卖出点
                sell_index = future_prices.idxmax()
                
                # 寻找买入点，它应该是在当前时间点之后且在最高卖出点之前的最低点
                min_price_within_hold = future_prices[:sell_index].min()
                buy_index = future_prices[future_prices == min_price_within_hold].index[0]

                # 确保买入时间早于卖出时间
                if buy_index < sell_index:
                    # 标记买入和卖出信号
                    df.at[buy_index, 'signal'] = 1
                    df.at[sell_index, 'signal'] = -1
                    
                    # 更新next_possible_buy，以确保在此之前不会再次买入
                    next_possible_buy = sell_index + 1  # 确保下次买入至少在当前卖出之后

    # 针对卖出后立即进行买入的情况进行修正
    processed_signal = df['signal'].copy()
    # 使用滑动窗口遍历信号
    for i in range(len(processed_signal) - gap_period + 1):
        # 当前窗口
        window = processed_signal[i:i+gap_period]
        # 检查窗口中的值是否满足条件
        if set(window) == {0, 1, -1}:
            # 如果满足条件，则将窗口内的所有信号赋值为0
            processed_signal[i:i+gap_period] = 0
    df['signal'] = processed_signal
    return df[['datetime', 'signal']]

In [24]:
stock_code = '600011'
# 4. 获取历史信息
stock_history_info = ak.stock_zh_a_hist(symbol=stock_code, adjust='hfq').rename(
            columns={
                "日期": "datetime",
                "开盘": "open",
                "最高": "high",
                "最低": "low",
                "收盘": "close",
                "成交量": "volume",
                "成交额": "turnover",
                "振幅": "amplitude",
                "涨跌幅": "change_pct",
                "涨跌额": "change_amount",
                "换手率": "turnover_rate",
            }
        )
stock_history_info.insert(0, 'stock_code', stock_code)

In [25]:
stock_label_info = generate_order_signal(stock_history_info)

In [155]:
# 2. 构建单一案例




# # 5. 生成Target信息
# def generate_market_timing_target(dataframe):
#     dataframe = dataframe.sort_values(by=["datetime"])
#     # 计算过去M=10天收益率的（mean & std）
#     dataframe["daily_return"] = dataframe["close"].pct_change()
#     dataframe["mean_return"] = dataframe["daily_return"].transform(lambda x: x.rolling(10).mean())
#     dataframe["std_return"] = dataframe["daily_return"].transform(lambda x: x.rolling(10).std())
#     # 计算未来N=5天的收益率
#     dataframe["close_in_5_days"] = dataframe["close"].shift(-5)
#     dataframe["return_5_days"] = dataframe["close_in_5_days"] / dataframe["close"] - 1
#     # 构建Target
#     dataframe["target"] = 0  # 默认设置为0
#     dataframe.loc[dataframe["return_5_days"] > dataframe["mean_return"] + 2 * dataframe["std_return"], "target"] = 1 # 买入信号
#     dataframe.loc[dataframe["return_5_days"] < dataframe["mean_return"] - 2 * dataframe["std_return"], "target"] = 2 # 卖出信号
#     # # 删除有NaN的值
#     dataframe.dropna(subset=["mean_return", "std_return", "close_in_5_days"], inplace=True)
#     # # 生成最终的Label表
#     dataframe = dataframe[["datetime", "target"]]
#     return dataframe
# stock_target_info = generate_market_timing_target(stock_history_info[['datetime', 'close']].copy())

# # 6. 生成时间特征
# def extract_time_features(datetime_series):
#     dataframe = pd.DataFrame()
#     dataframe['datetime'] = datetime_series
#     datetime_series = pd.to_datetime(datetime_series)
#     dataframe['weekday'] = datetime_series.dt.weekday  # 星期几（0=星期一，6=星期日）
#     dataframe['day_of_week'] = datetime_series.dt.day_name()  # 星期几的名称
#     dataframe['day_of_month'] = datetime_series.dt.day  # 一个月中的第几天
#     dataframe['month'] = datetime_series.dt.month  # 月份
#     dataframe['season'] = datetime_series.dt.month.map(lambda x: {
#         1: 'Winter', 2: 'Winter', 3: 'Spring', 4: 'Spring', 5: 'Spring',
#         6: 'Summer', 7: 'Summer', 8: 'Summer', 9: 'Autumn', 10: 'Autumn',
#         11: 'Autumn', 12: 'Winter'
#     }.get(x))
#     return dataframe
# stock_time_info = extract_time_features(stock_history_info['datetime'].copy())

# # 7. 生成价格特征
# def extract_price_features(dataframe):
#     dataframe.set_index(pd.DatetimeIndex(dataframe['datetime']), inplace=True)
#     dataframe.ta.cores = 0
#     dataframe.ta.strategy()
#     dataframe = dataframe[[i for i in dataframe.columns if i not in ['open', 'high', 'low', 'close', 'volume']]]
#     dataframe = dataframe.reset_index(drop=True)
#     return dataframe
# stock_price_info = extract_price_features(stock_history_info[['datetime', 'open', 'high', 'low', 'close', 'volume']].copy())

# # 8. 特征整合wide表
# stock_wide_info = stock_individual_info.merge(stock_history_info, on=['stock_code'], how='left').merge(stock_target_info, on=['datetime'], how='inner').merge(stock_time_info, on=['datetime'], how='inner').merge(stock_price_info, on=['datetime'], how='inner')
# stock_wide_info.fillna(0, inplace=True)

# # 9. wide表数据保存
# stock_wide_info.to_pickle(f'./wide_data/{stock_code}.pkl')


In [160]:
# import matplotlib.pyplot as plt
# %matplotlib inline

# def plot_label(df):
#     # Plot the close prices
#     plt.figure(figsize=(14, 7))
#     plt.plot(df['datetime'], df['close'], label='Close Price', color='blue')
#     # Plot the buy signals
#     buy_signals = df[df['signal'] == 1]
#     plt.scatter(buy_signals['datetime'], buy_signals['close'], label='Buy Signal', marker='^', color='green', alpha=1)
#     # Plot the sell signals
#     sell_signals = df[df['signal'] == -1]
#     plt.scatter(sell_signals['datetime'], sell_signals['close'], label='Sell Signal', marker='v', color='red', alpha=1)
#     # Add title and labels
#     plt.title('Stock Price with Buy/Sell Signals')
#     plt.xlabel('Date')
#     plt.ylabel('Price')
#     # Rotate date labels for better readability
#     plt.xticks(rotation=45)
#     # Show the legend
#     plt.legend()
#     # Show the plot
#     plt.show()

# plot_label(df)

In [22]:
# 使用tensorflow处理原始数据
import numpy as np
import pandas as pd
import tensorflow as tf
print(tf.__version__)

2024-02-19 18:25:19.970168: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.15.0


In [26]:
df = stock_label_info.merge(stock_history_info[['datetime', 'open', 'close', 'high', 'low', 'volume']], on=['datetime'], how='left')
df['datetime'] = pd.to_datetime(df['datetime'])
df.sort_values('datetime', inplace=True)

In [83]:
# 6. 选择固定时间区间的数据
train_start_date = pd.to_datetime('2000-01-01')
train_end_date = pd.to_datetime('2020-12-31')
val_start_date = pd.to_datetime('2021-01-01')
val_end_date = pd.to_datetime('2021-12-31')
test_start_date = pd.to_datetime('2022-01-01')
test_end_date = pd.to_datetime('2022-12-31')

train_df = df[(pd.to_datetime(df['datetime']) >= train_start_date) & (pd.to_datetime(df['datetime']) <= train_end_date)]
val_df = df[(pd.to_datetime(df['datetime']) >= val_start_date) & (pd.to_datetime(df['datetime']) <= val_end_date)]
test_df = df[(pd.to_datetime(df['datetime']) >= test_start_date) & (pd.to_datetime(df['datetime']) <= test_end_date)]

In [127]:
train_df[['open', 'close', 'high', 'low', 'volume', 'signal']].head()

Unnamed: 0,open,close,high,low,volume,signal
0,13.3,13.64,14.07,13.3,1234381,0
1,13.6,13.11,13.68,13.03,212267,0
2,13.02,13.18,13.28,12.76,111541,0
3,13.1,12.96,13.3,12.95,56542,0
4,12.9,13.46,13.73,12.87,188862,0


In [134]:
data = np.array(train_df[['open', 'close', 'high', 'low', 'volume', 'signal']], dtype=np.float32)

In [142]:
class WindowGenerator():
  def __init__(self, input_width, shift,
               train_df=train_df, val_df=val_df, test_df=test_df,feature_columns=[],
               label_columns='signal'):
    # Store the raw data.
    self.train_df = train_df[feature_columns]
    self.val_df = val_df[feature_columns]
    self.test_df = test_df[feature_columns]

    # Work out the label column indices.
    self.label_columns = label_columns
    if label_columns is not None:
      self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)} # {'signal': 0}
      
    self.column_indices = {name: i for i, name in
                           enumerate(self.train_df.columns)} # {'open': 0, 'close': 1, 'high': 2, 'low': 3, 'volume': 4, 'signal': 5}

    # Work out the window parameters.
    self.input_width = input_width
    self.shift = shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.input_width)[self.input_slice]

  def __repr__(self):
    return '\n'.join([
        f'Input Width: {self.input_width}',
        f'Input indices: {self.input_indices}',
        f'Label column name(s): {self.label_columns}'])
  
  # def split_window(self, features):
  #   inputs = features[:, self.input_slice, :]
  #   labels = features[:, self.labels_slice, :]
  #   if self.label_columns is not None:
  #     labels = tf.stack(
  #         [labels[:, :, self.column_indices[name]] for name in self.label_columns],
  #         axis=-1)

  #   # Slicing doesn't preserve static shape information, so set the shapes
  #   # manually. This way the `tf.data.Datasets` are easier to inspect.
  #   inputs.set_shape([None, self.input_width, None])
  #   labels.set_shape([None, self.label_width, None])

  #   return inputs, labels
  
  def make_dataset(self, data):
    data = np.array(data, dtype=np.float32)
    ds = tf.keras.utils.timeseries_dataset_from_array(
        data=data[:, :-1],
        targets=data[:, -1],
        sequence_length=self.input_width,
        sequence_stride=1,
        shuffle=False,
        batch_size=32,)

    # ds = ds.map(self.split_window)

    return ds

  @property
  def train(self):
    return self.make_dataset(self.train_df)

  @property
  def val(self):
    return self.make_dataset(self.val_df)

  @property
  def test(self):
    return self.make_dataset(self.test_df)

  @property
  def example(self):
    """Get and cache an example batch of `inputs, labels` for plotting."""
    result = getattr(self, '_example', None)
    if result is None:
      # No example batch was found, so get one from the `.train` dataset
      result = next(iter(self.train))
      # And cache it for next time
      self._example = result
    return result


In [143]:
w2 = WindowGenerator(input_width=30, shift=1, feature_columns=['open', 'close', 'high', 'low', 'volume', 'signal'], label_columns=['signal'])
w2

Input Width: 30
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29]
Label column name(s): ['signal']

In [146]:
w2.train.take(1)

<_TakeDataset element_spec=(TensorSpec(shape=(None, None, 5), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))>

In [147]:
for example_inputs, example_labels in w2.train.take(1):
  print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
  print(f'Labels shape (batch, time, features): {example_labels.shape}')

Inputs shape (batch, time, features): (32, 30, 5)
Labels shape (batch, time, features): (32,)


In [102]:
# example_inputs

In [33]:
train_0, train_1, train_2 = np.bincount(train_data['target'])
train_total = train_0 + train_1 + train_2
print('Train:\nTotal: {}, Normal: {},Positive: {}, Negative:{} \n'.format(train_total, train_0, train_1, train_2))

val_0, val_1, val_2 = np.bincount(validation_data['target'])
val_total = val_0 + val_1 + val_2
print('Validation:\nTotal: {}, Normal: {},Positive: {}, Negative:{} \n'.format(val_total, val_0, val_1, val_2))

test_0, test_1, test_2 = np.bincount(test_data['target'])
test_total = test_0 + test_1 + test_2
print('Test:\nTotal: {}, Normal: {},Positive: {}, Negative:{} \n'.format(test_total, test_0, test_1, test_2))


# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
weight_for_0 = (1 / train_0) * (train_total / 2.0)
weight_for_1 = (1 / train_1) * (train_total / 2.0)
weight_for_2 = (1 / train_2) * (train_total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1, 2: weight_for_2}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))
print('Weight for class 2: {:.2f}'.format(weight_for_2))

Train:
Total: 151865, Normal: 92464,Positive: 31636, Negative:27765 

Validation:
Total: 11619, Normal: 7301,Positive: 2230, Negative:2088 

Test:
Total: 11936, Normal: 7498,Positive: 2120, Negative:2318 

Weight for class 0: 0.82
Weight for class 1: 2.40
Weight for class 2: 2.73


In [125]:
def get_numeric_boundaries(series, num_bins=20):
    return pd.qcut(series, num_bins, retbins=True)[1].tolist()

TARGET_FEATURE_NAME = "target"
TARGET_FEATURE_LABELS = ["0", "1", "2"]
TARGET_FEATURE_LENGTH = len(TARGET_FEATURE_LABELS)

# 连续特征分桶
NUMERIC_FEATURES_WITH_BOUNDARIES = {
    'open': get_numeric_boundaries(train_data['open']),
    'close': get_numeric_boundaries(train_data['close']),
    'high': get_numeric_boundaries(train_data['high']),
    'low': get_numeric_boundaries(train_data['low']),
    'volume': get_numeric_boundaries(train_data['volume']),
    'turnover': get_numeric_boundaries(train_data['turnover']),
    'amplitude': get_numeric_boundaries(train_data['amplitude']),
    'change_pct': get_numeric_boundaries(train_data['change_pct']),
    'change_amount': get_numeric_boundaries(train_data['change_amount']),
    'turnover_rate': get_numeric_boundaries(train_data['turnover_rate'])
}
NUMERIC_FEATURE_NAMES = list(NUMERIC_FEATURES_WITH_BOUNDARIES.keys())

# 离散特征embedding
CATEGORICAL_FEATURES_WITH_VOCABULARY = {
    "industry": sorted(list(train_data["industry"].unique())),
}
CATEGORICAL_FEATURE_NAMES = list(CATEGORICAL_FEATURES_WITH_VOCABULARY.keys())

FEATURE_NAMES = NUMERIC_FEATURE_NAMES + CATEGORICAL_FEATURE_NAMES

In [121]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop(TARGET_FEATURE_NAME)
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

train_ds = df_to_dataset(train_data)
val_ds = df_to_dataset(test_data)
test_ds = df_to_dataset(test_data)

In [122]:
def create_model_inputs():
    inputs = {}
    for feature_name in FEATURE_NAMES:
        if feature_name in NUMERIC_FEATURE_NAMES:
            inputs[feature_name] = tf.keras.layers.Input(
                name=feature_name, shape=(), dtype="float32"
            )
        else:
            inputs[feature_name] = tf.keras.layers.Input(
                name=feature_name, shape=(), dtype="string"
            )
    return inputs

def encode_inputs(inputs):
    encoded_features = []
    embedding_dim = 4
    for feature_name in inputs:
        if feature_name in NUMERIC_FEATURE_NAMES: # 处理连续特征
            embedding_size = len(NUMERIC_FEATURES_WITH_BOUNDARIES[feature_name]) * 2
            embedding = tf.keras.layers.Embedding(
                input_dim=embedding_size, output_dim=embedding_dim
            )
            lookup_layer = tf.keras.layers.Discretization(bin_boundaries=NUMERIC_FEATURES_WITH_BOUNDARIES[feature_name],output_mode='int')
            encoded_feature = embedding(lookup_layer(inputs[feature_name]))
        elif feature_name in CATEGORICAL_FEATURE_NAMES: # 处理类别特征
            embedding_size = len(CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]) * 2
            embedding = tf.keras.layers.Embedding(
                input_dim=embedding_size, output_dim=embedding_dim
            )
            lookup_layer = tf.keras.layers.Hashing(num_bins=embedding_size)
            encoded_feature = embedding(lookup_layer(inputs[feature_name]))
        
        # print(encoded_feature)
        encoded_features.append(encoded_feature)
    
    all_features = tf.keras.layers.concatenate(encoded_features)
    return all_features

In [130]:
LEARNING_RATE = 5e-3
NUM_EPOCH = 50

def run_experiment(model, train_ds, val_ds, test_ds):
    optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE)
    loss = tf.keras.losses.SparseCategoricalCrossentropy()
    metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]
    model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=metrics,
    )

    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_sparse_categorical_accuracy',
        verbose=1,
        patience=10,
        mode='max',
        restore_best_weights=True
    )
    print("Start training the model...")
    history = model.fit(
        train_ds, 
        epochs=NUM_EPOCH, 
        validation_data=val_ds, 
        verbose=2,
        callbacks=[early_stopping],
        class_weight=class_weight
    )
    print("Model training finished")

    # loss, auc = model.evaluate(test_ds, verbose=0)
    # print(f"Test AUC::{round(auc * 100, 2)}%")

In [131]:
dropout_rate = 0.2
hidden_units = [64, 32]

In [132]:
def create_baseline_model(output_bias=None):
    inputs = create_model_inputs()
    features = encode_inputs(inputs)

    for units in hidden_units:
        features = tf.keras.layers.Dense(units)(features)
        features = tf.keras.layers.BatchNormalization()(features)
        features = tf.keras.layers.ReLU()(features)
        features = tf.keras.layers.Dropout(dropout_rate)(features)

    # outputs = tf.keras.layers.Dense(units=1, activation="sigmoid")(features)
    outputs = tf.keras.layers.Dense(units=TARGET_FEATURE_LENGTH, activation="softmax")(features)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

baseline_model = create_baseline_model()
# tf.keras.utils.plot_model(baseline_model, show_shapes=True, rankdir="LR")
run_experiment(baseline_model, train_ds, val_ds, test_ds)

Start training the model...
Epoch 1/20
4746/4746 - 4s - loss: 1.6707 - sparse_categorical_accuracy: 0.3124 - val_loss: 1.0877 - val_sparse_categorical_accuracy: 0.3660 - 4s/epoch - 939us/step
Epoch 2/20
4746/4746 - 4s - loss: 1.6441 - sparse_categorical_accuracy: 0.3202 - val_loss: 1.1016 - val_sparse_categorical_accuracy: 0.2666 - 4s/epoch - 794us/step
Epoch 3/20
4746/4746 - 4s - loss: 1.6419 - sparse_categorical_accuracy: 0.3321 - val_loss: 1.0973 - val_sparse_categorical_accuracy: 0.3288 - 4s/epoch - 797us/step
Epoch 4/20
4746/4746 - 4s - loss: 1.6400 - sparse_categorical_accuracy: 0.3355 - val_loss: 1.1016 - val_sparse_categorical_accuracy: 0.2805 - 4s/epoch - 796us/step
Epoch 5/20
4746/4746 - 4s - loss: 1.6392 - sparse_categorical_accuracy: 0.3363 - val_loss: 1.0998 - val_sparse_categorical_accuracy: 0.2874 - 4s/epoch - 793us/step
Epoch 6/20
4746/4746 - 4s - loss: 1.6374 - sparse_categorical_accuracy: 0.3379 - val_loss: 1.1015 - val_sparse_categorical_accuracy: 0.2721 - 4s/epoch -

In [133]:
def create_wide_and_deep_model():
    inputs = create_model_inputs()
    wide = encode_inputs(inputs)
    wide = tf.keras.layers.BatchNormalization()(wide)

    deep = encode_inputs(inputs)
    for units in hidden_units:
        deep = tf.keras.layers.Dense(units)(deep)
        deep = tf.keras.layers.BatchNormalization()(deep)
        deep = tf.keras.layers.ReLU()(deep)
        deep = tf.keras.layers.Dropout(dropout_rate)(deep)

    merged = tf.keras.layers.concatenate([wide, deep])
    # outputs = tf.keras.layers.Dense(units=1)(merged)
    outputs = tf.keras.layers.Dense(units=TARGET_FEATURE_LENGTH, activation="softmax")(merged)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model


wide_and_deep_model = create_wide_and_deep_model()
# keras.utils.plot_model(wide_and_deep_model, show_shapes=True, rankdir="LR")

run_experiment(wide_and_deep_model,train_ds, val_ds, test_ds)

Start training the model...
Epoch 1/20
4746/4746 - 6s - loss: 1.6681 - sparse_categorical_accuracy: 0.3398 - val_loss: 1.1178 - val_sparse_categorical_accuracy: 0.2932 - 6s/epoch - 1ms/step
Epoch 2/20
4746/4746 - 5s - loss: 1.6443 - sparse_categorical_accuracy: 0.3438 - val_loss: 1.1131 - val_sparse_categorical_accuracy: 0.2723 - 5s/epoch - 955us/step
Epoch 3/20
4746/4746 - 5s - loss: 1.6425 - sparse_categorical_accuracy: 0.3458 - val_loss: 1.0958 - val_sparse_categorical_accuracy: 0.3335 - 5s/epoch - 965us/step
Epoch 4/20
4746/4746 - 5s - loss: 1.6409 - sparse_categorical_accuracy: 0.3446 - val_loss: 1.0939 - val_sparse_categorical_accuracy: 0.3311 - 5s/epoch - 958us/step
Epoch 5/20
4746/4746 - 5s - loss: 1.6398 - sparse_categorical_accuracy: 0.3436 - val_loss: 1.1007 - val_sparse_categorical_accuracy: 0.3138 - 5s/epoch - 968us/step
Epoch 6/20
4746/4746 - 5s - loss: 1.6381 - sparse_categorical_accuracy: 0.3452 - val_loss: 1.0999 - val_sparse_categorical_accuracy: 0.2998 - 5s/epoch - 9

In [134]:
wide_and_deep_model.save('./hh_quant_tf_wdl_model')
reloaded_model = tf.keras.models.load_model('./hh_quant_tf_wdl_model')

INFO:tensorflow:Assets written to: ./hh_quant_tf_wdl_model/assets


INFO:tensorflow:Assets written to: ./hh_quant_tf_wdl_model/assets


In [135]:
samples = df_to_dataset(test_data.iloc[:100, :], shuffle=False, batch_size=10)

In [143]:
predictions = reloaded_model.predict(samples)
prob = tf.nn.softmax(tf.squeeze(predictions))

# print(
#     "This particular pet had a %.4f percent probability "
#     "of getting adopted." % (100 * prob)
# )

 1/10 [==>...........................] - ETA: 0s



In [146]:
prob.numpy().argmax(axis=-1)

array([1, 1, 0, 0, 2, 1, 1, 2, 0, 0, 0, 1, 1, 1, 2, 0, 2, 1, 0, 0, 0, 1,
       2, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 2, 1, 1, 1, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 2, 2, 1, 2, 1, 1, 1, 1,
       1, 2, 1, 2, 1, 1, 2, 1, 0, 1, 1, 2])