In [3]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
import einops
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import SparseCategoricalCrossentropy
# 启用 XLA
tf.config.optimizer.set_jit(True)
TARGET_FRAMES = 20  # 每个视频的关键帧数
HEIGHT, WIDTH = 224, 224  # 每帧的大小
class Conv2Plus1D(layers.Layer):
    def __init__(self, filters, kernel_size, padding, **kwargs):
        super().__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size
        self.padding = padding
        self.seq = tf.keras.Sequential([
            layers.Conv3D(filters=filters, kernel_size=(1, kernel_size[1], kernel_size[2]), padding=padding),
            layers.Conv3D(filters=filters, kernel_size=(kernel_size[0], 1, 1), padding=padding)
        ])

    def call(self, x):
        return self.seq(x)

    def get_config(self):
        # 确保返回所有参数，包括自定义的
        config = super().get_config()
        config.update({
            'filters': self.filters,
            'kernel_size': self.kernel_size,
            'padding': self.padding
        })
        return config

    @classmethod
    def from_config(cls, config):
        # 通过从配置字典中解构来创建类实例
        return cls(**config)

class ResidualMain(layers.Layer):
    def __init__(self, filters, kernel_size, **kwargs):
        super().__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size
        self.seq = tf.keras.Sequential([
            Conv2Plus1D(filters=filters, kernel_size=kernel_size, padding='same'),
            layers.LayerNormalization(),
            layers.ReLU(),
            Conv2Plus1D(filters=filters, kernel_size=kernel_size, padding='same'),
            layers.LayerNormalization()
        ])

    def call(self, x):
        return self.seq(x)

    def get_config(self):
        config = super().get_config()
        config.update({
            'filters': self.filters,
            'kernel_size': self.kernel_size
        })
        return config

    @classmethod
    def from_config(cls, config):
        return cls(**config)

def add_residual_block(input, filters, kernel_size):
  """
    Add residual blocks to the model. If the last dimensions of the input data
    and filter size does not match, project it such that last dimension matches.
  """
  out = ResidualMain(filters, 
                     kernel_size)(input)

  res = input
  # Using the Keras functional APIs, project the last dimension of the tensor to
  # match the new filter size
  if out.shape[-1] != input.shape[-1]:
    res = Project(out.shape[-1])(res)

  return layers.add([res, out])


class Project(layers.Layer):
    def __init__(self, units, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.seq = tf.keras.Sequential([
            layers.Dense(units),
            layers.LayerNormalization()
        ])

    def call(self, x):
        return self.seq(x)

    def get_config(self):
        config = super().get_config()
        config.update({
            'units': self.units
        })
        return config

    @classmethod
    def from_config(cls, config):
        return cls(**config)


class ResizeVideo(layers.Layer):
    def __init__(self, height, width, **kwargs):
        super().__init__(**kwargs)
        self.height = height
        self.width = width
        self.resizing_layer = layers.Resizing(self.height, self.width)

    def call(self, video):
        old_shape = einops.parse_shape(video, 'b t h w c')
        images = einops.rearrange(video, 'b t h w c -> (b t) h w c')
        images = self.resizing_layer(images)
        videos = einops.rearrange(images, '(b t) h w c -> b t h w c', t=old_shape['t'])
        return videos

    def get_config(self):
        config = super().get_config()
        config.update({
            'height': self.height,
            'width': self.width
        })
        return config

    @classmethod
    def from_config(cls, config):
        return cls(**config)

input_shape = (None, 20, HEIGHT, WIDTH, 3)  # 输入视频的形状，None 表示批次大小不固定
input = layers.Input(shape=(input_shape[1:]))  # 定义输入层，形状为 (时间步数, 高度, 宽度, 通道数)
x = input

# 初始卷积层：执行 2+1D 卷积操作（空间 + 时间分解）
x = Conv2Plus1D(filters=16, kernel_size=(3, 7, 7), padding='same')(x)
x = layers.BatchNormalization()(x)  # 批量归一化层，规范化每批次的特征
x = layers.ReLU()(x)  # 激活函数 ReLU
x = ResizeVideo(HEIGHT//2, WIDTH//4 )(x)  # 调整视频帧的尺寸到 (HEIGHT/2, WIDTH/2)

# Block 1: 添加第一个残差块并调整尺寸
x = add_residual_block(x, 16, (3, 3, 3))  # 添加残差块，过滤器数为 16，卷积核大小为 3x3x3
x = ResizeVideo(HEIGHT // 4, WIDTH // 4)(x)  # 调整尺寸到 (HEIGHT/4, WIDTH/4)

# Block 2: 添加第二个残差块并调整尺寸
x = add_residual_block(x, 32, (3, 3, 3))  # 过滤器数为 32
x = ResizeVideo(HEIGHT // 8, WIDTH // 8)(x)  # 调整尺寸到 (HEIGHT/8, WIDTH/8)

# Block 3: 添加第三个残差块并调整尺寸
x = add_residual_block(x, 64, (3, 3, 3))  # 过滤器数为 64
x = ResizeVideo(HEIGHT // 16, WIDTH // 16)(x)  # 调整尺寸到 (HEIGHT/16, WIDTH/16)

# Block 4: 添加第四个残差块
x = add_residual_block(x, 128, (3, 3, 3))  # 过滤器数为 128

# 全局平均池化和分类
x = layers.GlobalAveragePooling3D()(x)  # 对时间、空间维度进行全局平均池化，生成特征向量
x = layers.Flatten()(x)  # 展平为 1D 向量
x = layers.Dense(15)(x)  # 全连接层输出 10 个分类

# 定义模型
model = keras.Model(input, x)
# 修改损失函数的 reduction 参数
loss = SparseCategoricalCrossentropy(from_logits=False, reduction='sum_over_batch_size')
# 使用 Keras 提供的工具绘制模型结构
keras.utils.plot_model(
    model,               # 目标模型
    expand_nested=True,  # 展开嵌套的层，例如子模块或自定义层
    dpi=60,              # 设置图片分辨率
    show_shapes=True     # 显示每一层输出的形状
)



# 加载模型函数
def load_my_model(model_path):
    # 定义自定义层
    custom_objects = {
        'Conv2Plus1D': Conv2Plus1D,
        'ResizeVideo': ResizeVideo,
        'ResidualMain': ResidualMain,
        'Project': Project  # 确保 Project 也被添加到 custom_objects 中
    }
    
    # 使用 Keras 加载模型
    model = load_model(model_path, custom_objects=custom_objects)
    
    # 编译模型
    model.compile(
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        optimizer=keras.optimizers.Adam(learning_rate=0.0001),
        metrics=['accuracy']
    )
    
    return model


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [4]:
# 调用加载模型的函数，假设您给的路径是 model_path
model_path = r'C:\data\result\model\200grey_model.h5'  # 替换为您实际的模型文件路径
model = load_my_model(model_path)

In [6]:
import os
import numpy as np
import tensorflow as tf
from PIL import Image

def sobel_custom(image_array):
    sobel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
    sobel_y = np.array([[-1, -2, -1], [0, 0, 0], [1, 2, 1]])
    
    grad_x = np.zeros_like(image_array)
    grad_y = np.zeros_like(image_array)
    
    height, width = image_array.shape
    
    for i in range(1, height-1):
        for j in range(1, width-1):
            grad_x[i, j] = np.sum(image_array[i-1:i+2, j-1:j+2] * sobel_x)
            grad_y[i, j] = np.sum(image_array[i-1:i+2, j-1:j+2] * sobel_y)
    
    return grad_x, grad_y

def load_and_preprocess_frames_custom(frame_paths, target_height, target_width, target_frames):
    frames = []
    for frame_path in sorted(frame_paths):
        image = Image.open(frame_path).convert('L')  # 转为灰度图
        image = image.resize((target_width, target_height))
        image_array = np.array(image, dtype=np.float32) / 255.0
        
        grad_x, grad_y = sobel_custom(image_array)
        
        grad_x = (grad_x - grad_x.min()) / (grad_x.max() - grad_x.min() + 1e-6)
        grad_y = (grad_y - grad_y.min()) / (grad_y.max() - grad_y.min() + 1e-6)
        
        three_channel_frame = np.stack([image_array, grad_x, grad_y], axis=-1)
        frames.append(three_channel_frame)

    if len(frames) > target_frames:
        frames = frames[:target_frames]
    elif len(frames) < target_frames:
        padding = target_frames - len(frames)
        padding_frame = np.zeros((target_height, target_width, 3))
        frames.extend([padding_frame] * padding)

    return np.stack(frames, axis=0)

def predict_from_folder_custom(model, folder_path, height, width, frames):
    frame_paths = sorted([
        os.path.join(folder_path, frame).replace("\\", "/")
        for frame in os.listdir(folder_path)
        if frame.endswith((".jpg", ".png"))
    ])
    
    if not frame_paths:
        raise ValueError(f"文件夹 {folder_path} 中没有找到帧文件！")
    
    processed_frames = load_and_preprocess_frames_custom(frame_paths, height, width, frames)
    processed_frames = np.expand_dims(processed_frames, axis=0)

    probabilities = model.predict(processed_frames)
    return probabilities

def create_dataset_with_predictions_custom(model, output_dir, video_dir, height, width, frames):
    data_list = []
    labels_list = []

    for folder_name in os.listdir(output_dir):
        folder_path = os.path.join(output_dir, folder_name).replace("\\", "/")
        if not os.path.isdir(folder_path):
            continue

        for reference_folder in os.listdir(folder_path):
            reference_folder_path = os.path.join(folder_path, reference_folder).replace("\\", "/")
            if not os.path.isdir(reference_folder_path):
                continue

            # 提取父文件夹名称（如 0-两手托天理三焦（八段锦））
            parent_folder_name = folder_name.split("\\")[-1]
            
            # 查找对应的 MP4 文件路径
            video_folder_path = os.path.join(video_dir, parent_folder_name)
            if not os.path.isdir(video_folder_path):
                print(f"未找到视频文件夹：{video_folder_path}")
                continue
            
            video_files = [
                f for f in os.listdir(video_folder_path) if f.endswith(".mp4")
            ]
            if not video_files:
                print(f"在 {video_folder_path} 中没有找到视频文件")
                continue
            
            # 假设文件名格式为 reference_0_0.871667.mp4，提取标签
            video_file = video_files[0]
            try:
                label = float(video_file.split("_")[-1].replace(".mp4", ""))
            except ValueError:
                print(f"文件名中未找到分数标签: {video_file}")
                continue

            try:
                probabilities = predict_from_folder_custom(model, reference_folder_path, height, width, frames)
            except ValueError as e:
                print(e)
                continue

            data_list.append(probabilities[0])
            labels_list.append(label)

    data_tensor = tf.convert_to_tensor(np.array(data_list), dtype=tf.float32)
    labels_tensor = tf.convert_to_tensor(np.array(labels_list), dtype=tf.float32)
    return tf.data.Dataset.from_tensor_slices((data_tensor, labels_tensor))

# 参数配置
OUTPUT_DIR = "C:/data/result/scene_detector"  # 关键帧存放目录
VIDEO_DIR = "C:/data/video"  # 原视频文件目录
HEIGHT, WIDTH = 224, 224  # 每帧的大小
FRAMES = 20  # 每个视频的关键帧数
MODEL = model  # 加载您的 TensorFlow 模型

# 构建数据集
dataset = create_dataset_with_predictions_custom(MODEL, OUTPUT_DIR, VIDEO_DIR, HEIGHT, WIDTH, FRAMES)

# 检查数据集
for data, label in dataset.take(1):
    print("数据向量 (1×14):", data.numpy())
    print("标签值:", label.numpy())


文件夹 C:/data/result/scene_detector/0-两手托天理三焦（八段锦）/动作0-10-31 中没有找到帧文件！


KeyboardInterrupt: 

In [3]:
import os 
import numpy as np
import tensorflow as tf
from PIL import Image

def sobel_custom(image_array):
    sobel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
    sobel_y = np.array([[-1, -2, -1], [0, 0, 0], [1, 2, 1]])
    
    grad_x = np.zeros_like(image_array)
    grad_y = np.zeros_like(image_array)
    
    height, width = image_array.shape
    
    for i in range(1, height-1):
        for j in range(1, width-1):
            grad_x[i, j] = np.sum(image_array[i-1:i+2, j-1:j+2] * sobel_x)
            grad_y[i, j] = np.sum(image_array[i-1:i+2, j-1:j+2] * sobel_y)
    
    return grad_x, grad_y

def load_and_preprocess_frames_custom(frame_paths, target_height, target_width, target_frames):
    frames = []
    for frame_path in sorted(frame_paths):
        image = Image.open(frame_path).convert('L')  # 转为灰度图
        image = image.resize((target_width, target_height))
        image_array = np.array(image, dtype=np.float32) / 255.0
        
        grad_x, grad_y = sobel_custom(image_array)
        
        grad_x = (grad_x - grad_x.min()) / (grad_x.max() - grad_x.min() + 1e-6)
        grad_y = (grad_y - grad_y.min()) / (grad_y.max() - grad_y.min() + 1e-6)
        
        three_channel_frame = np.stack([image_array, grad_x, grad_y], axis=-1)
        frames.append(three_channel_frame)

    if len(frames) > target_frames:
        frames = frames[:target_frames]
    elif len(frames) < target_frames:
        padding = target_frames - len(frames)
        padding_frame = np.zeros((target_height, target_width, 3))
        frames.extend([padding_frame] * padding)

    return np.stack(frames, axis=0)

def predict_from_folder_custom(model, folder_path, height, width, frames):
    frame_paths = sorted([os.path.join(folder_path, frame).replace("\\", "/") for frame in os.listdir(folder_path) if frame.endswith((".jpg", ".png"))])
    
    if not frame_paths:
        raise ValueError(f"文件夹 {folder_path} 中没有找到帧文件！")
    
    processed_frames = load_and_preprocess_frames_custom(frame_paths, height, width, frames)
    processed_frames = np.expand_dims(processed_frames, axis=0)

    probabilities = model.predict(processed_frames)
    return probabilities

def create_dataset_with_predictions_custom(model, output_dir, video_dir, height, width, frames, log_file_path):
    data_list = []
    labels_list = []
    
    # 打开日志文件进行写入
    with open(log_file_path, 'a') as log_file:

        for folder_name in os.listdir(output_dir):
            folder_path = os.path.join(output_dir, folder_name).replace("\\", "/")
            if not os.path.isdir(folder_path):
                continue

            for reference_folder in os.listdir(folder_path):
                reference_folder_path = os.path.join(folder_path, reference_folder).replace("\\", "/")
                if not os.path.isdir(reference_folder_path):
                    continue

                # 提取父文件夹名称（如 0-两手托天理三焦（八段锦））
                parent_folder_name = folder_name.split("\\")[-1]
                
                # 查找对应的 MP4 文件路径
                video_folder_path = os.path.join(video_dir, parent_folder_name)
                if not os.path.isdir(video_folder_path):
                    print(f"未找到视频文件夹：{video_folder_path}")
                    continue
                
                video_files = [f for f in os.listdir(video_folder_path) if f.endswith(".mp4")]
                if not video_files:
                    print(f"在 {video_folder_path} 中没有找到视频文件")
                    continue
                
                # 假设文件名格式为 reference_0_0.871667.mp4，提取标签
                video_file = video_files[0]
                try:
                    label = float(video_file.split("_")[-1].replace(".mp4", ""))
                except ValueError:
                    print(f"文件名中未找到分数标签: {video_file}")
                    continue

                try:
                    probabilities = predict_from_folder_custom(model, reference_folder_path, height, width, frames)
                except ValueError as e:
                    print(e)
                    continue

                data_list.append(probabilities[0])
                labels_list.append(label)

                # 记录日志到文件（包括标签和预测结果）
                log_file.write(f"{parent_folder_name}_{reference_folder} (标签: {label})\n")  # 写入文件夹名和标签
                log_file.write(" ".join(map(str, probabilities[0])) + "\n")  # 写入14个概率值

    data_tensor = tf.convert_to_tensor(np.array(data_list), dtype=tf.float32)
    labels_tensor = tf.convert_to_tensor(np.array(labels_list), dtype=tf.float32)
    return tf.data.Dataset.from_tensor_slices((data_tensor, labels_tensor))

# 参数配置
OUTPUT_DIR = "C:/data/result/scene_detector"  # 关键帧存放目录
VIDEO_DIR = "C:/data/video"  # 原视频文件目录
LOG_FILE_PATH = "C:/data/result/txt/predictions_log.txt"  # 预测结果日志文件路径
HEIGHT, WIDTH = 224, 224  # 每帧的大小
FRAMES = 20  # 每个视频的关键帧数
MODEL = model  # 加载您的 TensorFlow 模型

# 构建数据集
dataset = create_dataset_with_predictions_custom(MODEL, OUTPUT_DIR, VIDEO_DIR, HEIGHT, WIDTH, FRAMES, LOG_FILE_PATH)

# 检查数据集
for data, label in dataset.take(1):
    print("数据向量 (1×14):", data.numpy())
    print("标签值:", label.numpy())


文件夹 C:/data/result/scene_detector/0-两手托天理三焦（八段锦）/动作0-10-31 中没有找到帧文件！
数据向量 (1×14): [9.95986879e-01 8.18129047e-06 8.77484126e-06 5.78877994e-08
 4.54797510e-06 3.13687133e-06 4.72669015e-09 9.77272401e-04
 1.68164005e-03 1.30030964e-09 1.32916577e-03 6.90987578e-09
 3.64507400e-07 1.03542185e-07]
标签值: 0.871667


In [100]:
import os
import numpy as np
import tensorflow as tf

# 文件路径
log_file = r"C:\data\result\txt\predictions_log.txt"
video_base_path = r"C:\data\video"

# 初始化特征和标签列表
features, labels = [], []

# 读取文件并解析数据
with open(log_file, "r", encoding="utf-8") as f:
    lines = f.readlines()

i = 0
while i < len(lines) - 1:  # 遍历文件
    label_line = lines[i].strip()
    data_line = lines[i + 1].strip()
    
    # 提取关键信息（例如 `0-两手托天理三焦（八段锦）_reference_0`）
    if "(标签:" in label_line:
        end_idx = label_line.find(" (标签:")
        key_info = label_line[:end_idx]  # 提取文件名关键部分
    
        # 解析文件路径
        action_folder = key_info.split("_")[0]
        filename_pattern = key_info.split("_", 1)[1]  # 提取 `reference_0`
        
        folder_path = os.path.join(video_base_path, action_folder)
        # 查找匹配的文件
        matching_files = [f for f in os.listdir(folder_path) if filename_pattern in f]
        if matching_files:
            # 提取文件名中的标签值
            file_name = matching_files[0]  # 假设只有一个匹配文件
            label_start = file_name.rfind("_") + 1
            label_end = file_name.rfind(".mp4")
            label_value = float(file_name[label_start:label_end])  # 提取标签值
            
            # 提取 14 维向量数据
            data_values = list(map(float, data_line.split()))
            
            # 检查数据和标签范围
            if len(data_values) == 14 and all(0 <= v <= 1 for v in data_values) and 0 <= label_value <= 1:
                features.append(data_values)
                labels.append(label_value)
    
    i += 2  # 跳过标签行和数据行

# 转换为 NumPy 数组
features = np.array(features)
labels = np.array(labels)

# 检查数据范围
print("Feature values range:", np.min(features), np.max(features))
print("Label values range:", np.min(labels), np.max(labels))

# 构建 TensorFlow 数据集
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
train_size = int(0.7 * len(dataset))
train_dataset = dataset.take(train_size).batch(128)
val_dataset = dataset.skip(train_size).batch(128)


print(f"数据集构建完成：总样本数 {len(labels)}")
print(f"训练集样本数 {train_size}, 验证集样本数 {len(labels) - train_size}")


Feature values range: 7.3049926e-15 0.9993037
Label values range: 0.35125 1.0
数据集构建完成：总样本数 699
训练集样本数 489, 验证集样本数 210


In [101]:
import tensorflow as tf
def save_dataset(dataset, save_path):
    """
    使用最新方法保存 TensorFlow 数据集。
    
    Args:
        dataset: tf.data.Dataset 对象。
        save_path: 保存路径。
    """
    dataset.save(save_path)
    print(f"数据集已保存到: {save_path}")

# 保存数据集
save_path = "C:/data/result/saved_dataset_score_true"
save_dataset(dataset, save_path)



数据集已保存到: C:/data/result/saved_dataset_score_true


In [7]:
import tensorflow as tf
def load_dataset(save_path):
    """
    加载保存的 tf.data.Dataset 对象。
    
    Args:
        save_path: 保存的路径。
    
    Returns:
        加载的 tf.data.Dataset 对象。
    """
    dataset = tf.data.Dataset.load(save_path)
    print(f"数据集已从: {save_path} 加载")
    return dataset
save_path = "C:/data/result/saved_dataset_score_true"
# 加载数据集
dataset = load_dataset(save_path)

# 检查加载后的数据集
for features, labels in loaded_dataset.take(1):
    print("Features:", features.numpy())
    print("Labels:", labels.numpy())


数据集已从: C:/data/result/saved_dataset_score_true 加载
Features: [9.95986900e-01 8.18129050e-06 8.77484100e-06 5.78878000e-08
 4.54797500e-06 3.13687130e-06 4.72669000e-09 9.77272400e-04
 1.68164000e-03 1.30030960e-09 1.32916580e-03 6.90987600e-09
 3.64507400e-07 1.03542185e-07]
Labels: 0.871667


In [5]:
import os

OUTPUT_DIR = "C:/data/result/scene_detector"  # 替换为实际路径

# 获取文件夹中的文件和文件夹列表
folder_list = os.listdir(OUTPUT_DIR)

# 输出原始顺序
print("原始读取顺序：")
for i, folder_name in enumerate(folder_list):
    print(f"{i + 1}: {folder_name}")


原始读取顺序：
1: 0-两手托天理三焦（八段锦）
2: 1-左右开弓似射雕（八段锦）
3: 10鹿抵（五禽戏）
4: 11鹿奔（五禽戏）
5: 12鸟伸（五禽戏）
6: 13鸟飞（五禽戏）
7: 14其他
8: 2-调理脾胃单臂举（八段锦）
9: 3-五劳七伤往后瞧（八段锦）
10: 4-摇头摆尾去心火（八段锦）
11: 5-两手攀足固肾腰（八段锦）
12: 6-攒拳怒目增气力（八段锦）
13: 7背后七颠百病消（八段锦）
14: 8虎举（五禽戏）
15: 9虎扑（五禽戏）


In [102]:
# 检查数据集
for data, label in dataset.take(1):
    print("数据向量 (1×14):", data.numpy())
    print("标签值:", label.numpy())

数据向量 (1×14): [9.95986900e-01 8.18129050e-06 8.77484100e-06 5.78878000e-08
 4.54797500e-06 3.13687130e-06 4.72669000e-09 9.77272400e-04
 1.68164000e-03 1.30030960e-09 1.32916580e-03 6.90987600e-09
 3.64507400e-07 1.03542185e-07]
标签值: 0.871667


In [12]:
import numpy as np
# 提取训练集和验证集中的特征和标签
train_features, train_labels = [], []
val_features, val_labels = [], []

# 遍历训练集
for features, labels in train_dataset:
    train_features.append(features.numpy())  # 转换为 NumPy 数组
    train_labels.append(labels.numpy())

# 遍历验证集
for features, labels in val_dataset:
    val_features.append(features.numpy())
    val_labels.append(labels.numpy())

# 拼接所有批次
train_features = np.concatenate(train_features, axis=0)
train_labels = np.concatenate(train_labels, axis=0)
val_features = np.concatenate(val_features, axis=0)
val_labels = np.concatenate(val_labels, axis=0)

# 统计每个 14 维向量的最大值
train_feature_max = np.max(train_features, axis=1)  # 训练集每个向量的最大值
val_feature_max = np.max(val_features, axis=1)      # 验证集每个向量的最大值

# 找到这些最大值的最大值和最小值
overall_max_max = max(np.max(train_feature_max), np.max(val_feature_max))  # 所有最大值的最大值
overall_min_max = min(np.min(train_feature_max), np.min(val_feature_max))  # 所有最大值的最小值

# 打印结果
print("Training features range:", np.min(train_features), np.max(train_features))
print("Training labels range:", np.min(train_labels), np.max(train_labels))
print("Validation features range:", np.min(val_features), np.max(val_features))
print("Validation labels range:", np.min(val_labels), np.max(val_labels))
print("Maximum of max values across all features:", overall_max_max)
print("Minimum of max values across all features:", overall_min_max)


Training features range: 7.3049926e-15 0.9993037
Training labels range: 0.35125 1.0
Validation features range: 8.8999656e-14 0.9768819
Validation labels range: 0.56 1.0
Maximum of max values across all features: 0.9993037
Minimum of max values across all features: 0.23587419


In [9]:
# 直接将 dataset 划分为训练集和测试集
train_size = int(0.9 * len(dataset))  # 80% 用于训练
train_dataset = dataset.take(train_size).batch(128)
val_dataset = dataset.skip(train_size).batch(128)


In [118]:
from tensorflow.keras import layers, Model

# 定义 Wide & Deep 模型
def build_wide_and_deep_model(input_dim):
    # 输入层
    inputs = layers.Input(shape=(input_dim,))

    # Wide 部分（线性模型）
    wide_output = layers.Dense(1, activation="linear", name="wide")(inputs)

    # Deep 部分，尝试去掉正则化
    deep = layers.Dense(128, activation="relu")(inputs)  # 增加神经元数量
    deep = layers.Dense(64, activation="relu")(deep)
    deep_output = layers.Dense(1, activation="linear", name="deep")(deep)

    # 合并 Wide 和 Deep 部分
    combined = layers.Add()([wide_output, deep_output])

    # 输出层，使用sigmoid激活函数，确保输出在[0, 1]范围内
    outputs = layers.Activation("sigmoid")(combined)

    # 构建模型
    model = Model(inputs=inputs, outputs=outputs, name="Wide_Deep_Model")
    return model

# 获取输入维度（14维）
input_dim = 14
model = build_wide_and_deep_model(input_dim)

# 查看模型结构
model.summary()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001),
              loss="mean_squared_error",
              metrics=["mean_squared_error"])


Model: "Wide_Deep_Model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_34 (InputLayer)          [(None, 14)]         0           []                               
                                                                                                  
 dense_71 (Dense)               (None, 128)          1920        ['input_34[0][0]']               
                                                                                                  
 dense_72 (Dense)               (None, 64)           8256        ['dense_71[0][0]']               
                                                                                                  
 wide (Dense)                   (None, 1)            15          ['input_34[0][0]']               
                                                                                    

In [105]:
# 启动训练
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=1,  # 训练50轮
    verbose=1
)



In [92]:
print("Training labels range:", np.min(y_train), np.max(y_train))
print("Validation labels range:", np.min(y_val), np.max(y_val))


NameError: name 'y_train' is not defined

In [61]:
import tensorflow as tf

# 定义路径
best_model_path = "c:/data/result/model/score/best_model.h5"

# 早停回调：连续20轮验证MSE未改善停止训练
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_mean_squared_error',  # 监控验证集MSE
    patience=50,                      # 容忍连续20轮无改善
    mode='min',                       # 最小化MSE
    verbose=1
)

# 模型检查点回调：保存验证MSE最小的模型
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=best_model_path,         # 保存路径
    monitor='val_mean_squared_error', # 监控验证集MSE
    save_best_only=True,              # 仅保存验证MSE最小的模型
    mode='min',                       # 最小化MSE
    verbose=1
)

# 启动训练
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=500,  # 最大训练轮次
    callbacks=[early_stopping, model_checkpoint],  # 加入回调
    verbose=1
)


Epoch 1/500
Epoch 1: val_mean_squared_error improved from inf to 0.66471, saving model to c:/data/result/model/score\best_model.h5
Epoch 2/500
Epoch 2: val_mean_squared_error improved from 0.66471 to 0.65499, saving model to c:/data/result/model/score\best_model.h5
Epoch 3/500
1/5 [=====>........................] - ETA: 0s - loss: 0.8932 - mean_squared_error: 0.8932
Epoch 3: val_mean_squared_error improved from 0.65499 to 0.64542, saving model to c:/data/result/model/score\best_model.h5
Epoch 4/500
1/5 [=====>........................] - ETA: 0s - loss: 0.8773 - mean_squared_error: 0.8773
Epoch 4: val_mean_squared_error improved from 0.64542 to 0.63599, saving model to c:/data/result/model/score\best_model.h5
Epoch 5/500
1/5 [=====>........................] - ETA: 0s - loss: 0.8616 - mean_squared_error: 0.8616
Epoch 5: val_mean_squared_error improved from 0.63599 to 0.62676, saving model to c:/data/result/model/score\best_model.h5
Epoch 6/500
1/5 [=====>........................] - ETA: 

In [120]:
import numpy as np
import tensorflow as tf

class CombinedMetricEarlyStopping(tf.keras.callbacks.Callback):
    def __init__(self, filepath, patience=50, verbose=1):
        super(CombinedMetricEarlyStopping, self).__init__()
        self.filepath = filepath
        self.patience = patience
        self.verbose = verbose
        self.best_combined_score = np.inf  # 最优综合分数
        self.best_epoch = -1
        self.no_improvement_count = 0  # 未改善的轮数

    def on_epoch_end(self, epoch, logs=None):
        # 获取训练集和验证集的 MSE
        train_mse = logs.get('mean_squared_error')  # 假设模型有 MSE 指标
        val_mse = logs.get('val_mean_squared_error')

        # 定义综合评分规则（训练 MSE + 验证 MSE）
        combined_score = train_mse + val_mse

        # 如果综合评分改善
        if combined_score < self.best_combined_score:
            self.best_combined_score = combined_score
            self.best_epoch = epoch
            self.no_improvement_count = 0  # 重置未改善计数器

            # 保存模型
            self.model.save(self.filepath)
            if self.verbose > 0:
                print(f"Epoch {epoch + 1}: Combined score improved to {combined_score:.4f}. Saving model to {self.filepath}.")
        else:
            self.no_improvement_count += 1
            if self.verbose > 0:
                print(f"Epoch {epoch + 1}: Combined score did not improve from {self.best_combined_score:.4f}. "
                      f"No improvement count: {self.no_improvement_count}/{self.patience}.")

        # 如果未改善的轮数达到耐心值，停止训练
        if self.no_improvement_count >= self.patience:
            print(f"Early stopping triggered at epoch {epoch + 1}. Best combined score: {self.best_combined_score:.4f}.")
            self.model.stop_training = True

# 定义路径和早停回调
filepath = "c:/data/result/model/score/best_model.h5"
early_stopping = CombinedMetricEarlyStopping(filepath=filepath, patience=200, verbose=1)

# 开始训练
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=1200,  # 最大训练轮次
    callbacks=[early_stopping],
    verbose=1
)


Epoch 1/1200
1/5 [=====>........................] - ETA: 0s - loss: 0.0083 - mean_squared_error: 0.0083Epoch 1: Combined score improved to 0.0146. Saving model to c:/data/result/model/score/best_model.h5.
Epoch 2/1200
1/5 [=====>........................] - ETA: 0s - loss: 0.0083 - mean_squared_error: 0.0083Epoch 2: Combined score improved to 0.0146. Saving model to c:/data/result/model/score/best_model.h5.
Epoch 3/1200
1/5 [=====>........................] - ETA: 0s - loss: 0.0083 - mean_squared_error: 0.0083Epoch 3: Combined score improved to 0.0146. Saving model to c:/data/result/model/score/best_model.h5.
Epoch 4/1200
1/5 [=====>........................] - ETA: 0s - loss: 0.0083 - mean_squared_error: 0.0083Epoch 4: Combined score improved to 0.0146. Saving model to c:/data/result/model/score/best_model.h5.
Epoch 5/1200
1/5 [=====>........................] - ETA: 0s - loss: 0.0083 - mean_squared_error: 0.0083Epoch 5: Combined score improved to 0.0146. Saving model to c:/data/result/mo