In [13]:
import pandas as pd
import re
import numpy as np
import os
from datetime import datetime

# 定义预期列名
column_names = ['timestamp', 'sensor_name', 'ax', 'ay', 'az', 'wx', 'wy', 'wz']

def validate_input_data(file_path, expected_cols, skip_invalid=True):
    """
    验证输入 CSV 文件的格式和完整性，特别处理时间戳格式 MM:SS.s。

    参数：
    file_path (str): 输入文件路径
    expected_cols (list): 预期的列名列表
    skip_invalid (bool): 是否跳过无效时间戳行（默认 True）

    返回：
    pd.DataFrame: 验证后的数据框
    """
    # 1. 检查文件存在性
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"输入文件 {file_path} 不存在")

    # 2. 读取数据并验证列数
    try:
        df = pd.read_csv(file_path, header=0, sep=',', engine='python')
        if df.shape[1] != len(expected_cols):
            raise ValueError(f"文件 {file_path} 列数 ({df.shape[1]}) 与预期 ({len(expected_cols)}) 不符")
        df.columns = expected_cols
    except Exception as e:
        raise ValueError(f"读取文件 {file_path} 失败: {e}")

    # 3. 检查缺失值
    if df.isna().any().any():
        print(f"警告：文件 {file_path} 包含缺失值")
        df = df.dropna().reset_index(drop=True)
    '''
    # 4. 删除完全重复的行
    initial_rows = len(df)
    df = df.drop_duplicates().reset_index(drop=True)
    if len(df) < initial_rows:
        print(f"警告：文件 {file_path} 包含 {initial_rows - len(df)} 行重复数据，已删除")
    '''
    # 5. 验证时间戳格式 (MM:SS.s)
    def parse_timestamp(ts):
        if not isinstance(ts, str):
            return pd.NaT

        # 检查时间戳是否包含日期部分
        date_pattern = r'\d{4}-\d{2}-\d{2}'
        if re.search(date_pattern, ts):
            # 如果包含日期部分，直接解析
            return pd.to_datetime(ts, errors='coerce')
        else:
            # 如果不包含日期部分，添加默认日期并解析 MM:SS.s 格式
            try:
                return pd.to_datetime(f"1900-01-01 {ts}", format='%Y-%m-%d %M:%S.%f', errors='coerce')
            except ValueError:
                return pd.NaT

    # 保存原始时间戳以便调试
    original_timestamps = df['timestamp'].copy()
    df['timestamp'] = df['timestamp'].apply(parse_timestamp)

    # 检查无效时间戳
    invalid_rows = df[df['timestamp'].isna()]
    if not invalid_rows.empty:
        print(f"警告：文件 {file_path} 包含 {len(invalid_rows)} 个无效时间戳，样例：")
        invalid_sample = pd.DataFrame({
            'row_index': invalid_rows.index,
            'original_timestamp': original_timestamps[invalid_rows.index]
        })
        print(invalid_sample.head(10))
        if not skip_invalid:
            raise ValueError(f"文件 {file_path} 包含 {len(invalid_rows)} 个无效时间戳")
        else:
            print(f"跳过 {len(invalid_rows)} 个无效时间戳行")
            df = df.dropna(subset=['timestamp']).reset_index(drop=True)

    # 6. 验证时间戳范围
    if not df.empty:
        minutes = df['timestamp'].dt.minute
        seconds = df['timestamp'].dt.second + df['timestamp'].dt.microsecond / 1e6
        if (minutes >= 60).any() or (seconds >= 60).any():
            print(f"警告：文件 {file_path} 包含异常时间戳（分钟或秒超出范围）")
            invalid_range = df[(minutes >= 60) | (seconds >= 60)][['timestamp']]
            print(f"异常时间戳样例：\n{invalid_range.head()}")
            if not skip_invalid:
                raise ValueError(f"文件 {file_path} 包含无效时间戳范围")
            df = df[(minutes < 60) & (seconds < 60)].reset_index(drop=True)

    # 7. 验证数值列
    numeric_cols = expected_cols[2:]  # ax, ay, az, wx, wy, wz
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')
    if df[numeric_cols].isna().any().any():
        print(f"警告：文件 {file_path} 的数值列 {numeric_cols} 包含非数值数据")
        df = df.dropna().reset_index(drop=True)

    # 8. 检查时间戳递增
    if not df.empty and not df['timestamp'].is_monotonic_increasing:
        print(f"警告：文件 {file_path} 的时间戳不是单调递增的，将进行排序")
        df = df.sort_values(by='timestamp').reset_index(drop=True)

    # 9. 检查传感器名称
    unique_sensors = df['sensor_name'].unique()
    if len(unique_sensors) == 0:
        raise ValueError(f"文件 {file_path} 没有有效的传感器名称")
    print(f"文件 {file_path} 包含传感器: {unique_sensors}")

    # 10. 检查数据范围（加速度 ±20g，角速度 ±2000 deg/s）
    for col in numeric_cols:
        max_val = df[col].abs().max()
        threshold = 20 if col.startswith('a') else 2000
        if max_val > threshold:
            print(f"警告：文件 {file_path} 的 {col} 列数据范围异常，最大值: {max_val}")

    # 11. 检查数据量
    if len(df) < 10:
        raise ValueError(f"文件 {file_path} 数据量不足，仅有 {len(df)} 行")

    return df

# 单位转换函数
def convert_units(data):
    gyro_cols = ['wx', 'wy', 'wz']
    data[gyro_cols] = data[gyro_cols] * np.pi / 180
    return data

# 数据处理流程
def process_data(df):
    df_sorted = df.sort_values(by='timestamp')
    groups = df_sorted.groupby('sensor_name', sort=False)
    processed = []
    for name, group in groups:
        try:
            downsampled = group.iloc[::1]  # 未降采样
            processed.append(downsampled)
        except Exception as e:
            print(f"设备 {name} 处理失败: {e}")
            continue
    final_df = pd.concat(processed).reset_index(drop=True)
    final_df = final_df.sort_values(by=['sensor_name', 'timestamp']).reset_index(drop=True)
    keep_cols = ['timestamp', 'sensor_name', 'ax', 'ay', 'az', 'wx', 'wy', 'wz']
    return final_df[keep_cols]

# 执行验证和处理
file_path = 'pre/raw/YWQZ_728_2.csv'
try:
    df = validate_input_data(file_path, column_names, skip_invalid=True)
    df = convert_units(df)
    processed_data = process_data(df)
    processed_data.to_csv('test.csv', index=False)
    print("数据处理完成，保存为 test.csv")
    print("处理后数据样例：")
    print(processed_data.head(3))
except Exception as e:
    print(f"数据验证或处理失败: {e}")
    exit()

# 第二部分：重新组织数据
try:
    df = pd.read_csv('test.csv', header=0)
    if df.empty:
        raise ValueError("文件 'test.csv' 为空")
    groups = df.groupby('sensor_name', sort=False)
    group_names = list(groups.groups.keys())
    if not group_names:
        raise ValueError("CSV 文件中未找到任何设备分组")

    min_rows = min(len(groups.get_group(name)) for name in group_names)
    final_result = []
    for idx in range(min_rows):
        row_data = []
        for name in group_names:
            group_df = groups.get_group(name)
            row_data.extend(group_df.iloc[idx, 2:8].tolist())
        final_result.append(row_data)

    pd.DataFrame(final_result).to_csv('pre/test/qup02.csv', index=False, header=False)
    print(f"文件已成功保存，有效行数：{min_rows}")
except FileNotFoundError:
    raise FileNotFoundError("文件 'test.csv' 未找到，请检查路径是否正确")
except Exception as e:
    raise ValueError(f"数据处理失败: {e}")

文件 pre/raw/YWQZ_728_2.csv 包含传感器: ['WTNB_BLE_06' 'WTNB_BLE_02' 'WTNB_BLE_05' 'WTNB_BLE_03' 'WTNB_BLE_01'
 'WTNB_BLE_04']
数据处理完成，保存为 test.csv
处理后数据样例：
                   timestamp  sensor_name    ax    ay    az        wx  \
0 2025-07-23 22:32:19.712155  WTNB_BLE_01  1.66  9.26  2.56  0.479442   
1 2025-07-23 22:32:19.712461  WTNB_BLE_01  1.74  9.18  2.60  0.601790   
2 2025-07-23 22:32:19.712705  WTNB_BLE_01  1.94  9.40  1.62  0.859749   

         wy        wz  
0 -0.205600  0.124617  
1 -0.186401  0.118159  
2 -0.153414  0.104371  
文件已成功保存，有效行数：2280


特征数据处理

In [6]:
import numpy as np
import pandas as pd
import os
from data_features import *  # 包含 featureRMS, featureMAV, featureWL, featureZC, featureSSC

# 文件和标签映射
file_labels = {
    'data/s1/dunaid.csv': 0,
    'data/s2/upaid.csv': 1,
    'data/s3/qupaid.csv': 2,
}

def validate_feature_input(file_path):
    """
    验证特征提取阶段的输入文件。

    参数：
    file_path (str): 输入文件路径

    返回：
    pd.DataFrame: 验证后的数据框
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"文件 {file_path} 不存在")

    try:
        df = pd.read_csv(file_path, header=None)
        expected_cols = 36  # 6 传感器 * 6 列 (ax, ay, az, wx, wy, wz)
        if df.shape[1] != expected_cols:
            raise ValueError(f"文件 {file_path} 列数 ({df.shape[1]}) 不等于预期 ({expected_cols})")
    except Exception as e:
        raise ValueError(f"读取文件 {file_path} 失败: {e}")

    df = df.apply(pd.to_numeric, errors='coerce')
    if df.isna().any().any():
        print(f"警告：文件 {file_path} 包含非数值数据，已移除")
        df = df.dropna().reset_index(drop=True)

    if len(df) < 2:
        raise ValueError(f"文件 {file_path} 数据量不足，仅有 {len(df)} 行")

    return df

# 特征提取
featureData = []
labels = []
timeWindow = 5
strideWindow = 2

for file_name, label in file_labels.items():
    print(f"处理文件: {file_name}")
    try:
        df = validate_feature_input(file_name)
        if df.empty:
            print(f"错误：文件 {file_name} 的数据框为空")
            continue

        df_values = df.values
        length = df_values.shape[0]
        print(f"文件 {file_name} 包含 {length} 行数据")

        for j in range(0, length - timeWindow + 1, strideWindow):
            window_data = df_values[j:j + timeWindow, :]
            if window_data.shape[0] < timeWindow:
                print(f"警告：文件 {file_name} 的窗口 {j} 数据不足，跳过")
                continue

            try:
                rms = featureRMS(window_data)
                mav = featureMAV(window_data)
                wl = featureWL(window_data)
                zc = featureZC(window_data)
                ssc = featureSSC(window_data)
                featureStack = np.hstack((rms, mav, wl, zc, ssc))
                if featureStack.shape[0] != 180:  # 5 特征 * 36 通道
                    print(f"警告：文件 {file_name} 窗口 {j} 特征维度 {featureStack.shape[0]} 不正确")
                    continue
                featureData.append(featureStack)
                labels.append(label)
            except Exception as e:
                print(f"文件 {file_name} 的窗口 {j} 特征提取失败: {e}")
                continue
    except Exception as e:
        print(f"处理文件 {file_name} 失败: {e}")
        continue

featureData = np.array(featureData)
labels = np.array(labels)

if len(featureData) != len(labels):
    raise ValueError(f"特征数据 ({len(featureData)}) 和标签 ({len(labels)}) 长度不匹配")
if featureData.size == 0:
    raise ValueError("没有提取到任何特征数据")
if labels.size == 0:
    raise ValueError("没有提取到任何标签数据")

pd.DataFrame(featureData).to_csv('featuresdata.csv', index=False, header=False)
pd.DataFrame(labels).to_csv('labelsdata.csv', index=False, header=False)

print("Feature data shape:", featureData.shape)
print("Labels shape:", labels.shape)

处理文件: data/s1/dunaid.csv
文件 data/s1/dunaid.csv 包含 15367 行数据
处理文件: data/s2/upaid.csv
文件 data/s2/upaid.csv 包含 9382 行数据
处理文件: data/s3/qupaid.csv
文件 data/s3/qupaid.csv 包含 16533 行数据
Feature data shape: (20636, 180)
Labels shape: (20636,)


训练预处理

In [14]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn.metrics import classification_report
import os
import matplotlib.pyplot as plt
from datetime import datetime

# 全局超参数
LAMBDA_LOSS_AMOUNT = 0.0015
BATCH_SIZE = 64
EPOCHS = 24
LEARNING_RATE = 0.001
N_STEPS = 2
N_SIGNALS = 90
N_CLASSES = 3
N_HIDDEN = 256
TRAIN_DIR = "train"

def weighted_categorical_crossentropy(weights):
    """
    自定义加权交叉熵损失函数。

    参数：
    weights: 类别权重数组，形状 (N_CLASSES,)
    """
    weights = tf.constant(weights, dtype=tf.float32)

    def loss(y_true, y_pred):
        y_pred = tf.clip_by_value(y_pred, 1e-7, 1.0 - 1e-7)
        cross_entropy = -tf.reduce_sum(y_true * tf.math.log(y_pred) * weights, axis=-1)
        return tf.reduce_mean(cross_entropy)

    return loss

def LSTM_RNN(n_steps, n_input, n_hidden, n_classes, lambda_loss_amount, loss_function, weights=None):
    """
    构建 LSTM 模型，支持多种损失函数。
    """
    inputs = tf.keras.Input(shape=(n_steps, n_input), name='input_layer')

    lstm_layer_1 = tf.keras.layers.LSTM(
        units=n_hidden,
        return_sequences=True,
        dropout=0.3,
        recurrent_dropout=0.2,
        kernel_regularizer=tf.keras.regularizers.l2(lambda_loss_amount)
    )(inputs)
    norm_layer_1 = tf.keras.layers.LayerNormalization()(lstm_layer_1)

    lstm_layer_2 = tf.keras.layers.LSTM(
        units=n_hidden // 2,
        return_sequences=False,
        dropout=0.3,
        recurrent_dropout=0.2,
        kernel_regularizer=tf.keras.regularizers.l2(lambda_loss_amount)
    )(norm_layer_1)
    norm_layer_2 = tf.keras.layers.LayerNormalization()(lstm_layer_2)

    dense_layer_1 = tf.keras.layers.Dense(
        units=n_hidden // 4,
        activation='relu',
        kernel_regularizer=tf.keras.regularizers.l2(lambda_loss_amount)
    )(norm_layer_2)
    dropout_dense = tf.keras.layers.Dropout(0.3)(dense_layer_1)

    output_layer = tf.keras.layers.Dense(
        units=n_classes,
        activation='softmax',
        name='output_layer',
        kernel_regularizer=tf.keras.regularizers.l2(lambda_loss_amount)
    )(dropout_dense)

    model = tf.keras.Model(inputs=inputs, outputs=output_layer)

    if loss_function == 'weighted_categorical_crossentropy':
        loss = weighted_categorical_crossentropy(weights)
    else:
        loss = loss_function

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
        loss=loss,
        metrics=['accuracy']
    )
    return model

def extract_batch_size(_train, step, batch_size):
    """
    从训练数据中提取指定批次大小的数据。
    """
    if len(_train) < batch_size:
        raise ValueError(f"训练数据样本数 ({len(_train)}) 小于批次大小 ({batch_size})")

    shape = list(_train.shape)
    shape[0] = batch_size
    batch_s = np.empty(shape)

    for i in range(batch_size):
        index = ((step - 1) * batch_size + i) % len(_train)
        batch_s[i] = _train[index]

    return batch_s

def one_hot(y_, n_classes):
    """
    将标签转换为 one-hot 编码。
    """
    try:
        y_ = y_.reshape(-1).astype(np.int32)
        return np.eye(n_classes)[y_]
    except Exception as e:
        raise ValueError(f"one-hot 编码失败: {e}")

def load_X(input_csv_file):
    """
    加载特征数据并进行预处理。
    """
    if not os.path.exists(input_csv_file):
        raise FileNotFoundError(f"文件 {input_csv_file} 不存在")

    X = pd.read_csv(input_csv_file, header=None)
    X = X.apply(pd.to_numeric, errors='coerce')
    if X.isna().any().any():
        print(f"警告：文件 {input_csv_file} 包含非数值数据，已移除")
        X = X.dropna().reset_index(drop=True)

    if X.empty:
        raise ValueError(f"文件 {input_csv_file} 数据为空")

    if X.shape[1] != N_STEPS * N_SIGNALS:
        raise ValueError(f"特征数 {X.shape[1]} 不匹配 {N_STEPS} * {N_SIGNALS}")

    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    print(f"特征数据形状: {X_scaled.shape}")
    return X_scaled, scaler

def load_y(labels_csv_file):
    """
    加载标签数据。
    """
    if not os.path.exists(labels_csv_file):
        raise FileNotFoundError(f"文件 {labels_csv_file} 不存在")

    y = pd.read_csv(labels_csv_file, header=None)
    y = y.squeeze().values
    y = y[np.char.isdigit(y.astype(str))]
    if len(y) == 0:
        raise ValueError(f"文件 {labels_csv_file} 没有有效标签")

    y = y.astype(np.int32)
    if not np.all(np.isin(y, range(N_CLASSES))):
        raise ValueError(f"标签值 {np.unique(y)} 超出预期类别范围 [0, {N_CLASSES-1}]")

    return y

def validate_data_shapes(X, y):
    """
    验证特征和标签数据形状是否匹配。
    """
    if len(X) != len(y):
        raise ValueError(f"特征数据 ({len(X)}) 和标签数据 ({len(y)}) 长度不匹配")
    print(f"数据验证通过：{len(X)} 样本")

def reshape_data(X, n_steps, n_signals):
    """
    重新组织数据形状为 (n_samples, n_steps, n_signals)。
    """
    expected_features = n_steps * n_signals
    if X.shape[1] != expected_features:
        raise ValueError(f"特征数 {X.shape[1]} 不匹配 {n_steps} * {n_signals}")
    return X.reshape(-1, n_steps, n_signals)

def compute_class_weights(y):
    """
    计算类权重以处理类别不平衡。
    """
    class_weights = class_weight.compute_class_weight(
        class_weight='balanced',
        classes=np.arange(N_CLASSES),
        y=y
    )
    return dict(enumerate(class_weights)), class_weights

def plot_training_history(history, save_dir, loss_name):
    """
    绘制并保存训练过程中的损失和准确率曲线。
    """
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    # 损失曲线
    plt.figure(figsize=(10, 5))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'Training and Validation Loss ({loss_name})')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    loss_plot_path = os.path.join(save_dir, f'loss_{loss_name}_{timestamp}.png')
    plt.savefig(loss_plot_path)
    plt.close()
    print(f"损失曲线已保存至: {loss_plot_path}")

    # 准确率曲线
    plt.figure(figsize=(10, 5))
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title(f'Training and Validation Accuracy ({loss_name})')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    acc_plot_path = os.path.join(save_dir, f'accuracy_{loss_name}_{timestamp}.png')
    plt.savefig(acc_plot_path)
    plt.close()
    print(f"准确率曲线已保存至: {acc_plot_path}")

def save_scaler_params(scaler, save_dir):
    """
    使用 NumPy 保存 MinMaxScaler 的最大值和最小值。
    """
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    min_path = os.path.join(save_dir, f'scaler_min_{timestamp}.npy')
    max_path = os.path.join(save_dir, f'scaler_max_{timestamp}.npy')

    np.save(min_path, scaler.data_min_)
    np.save(max_path, scaler.data_max_)
    print(f"Scaler 最小值已保存至: {min_path}")
    print(f"Scaler 最大值已保存至: {max_path}")

def convert_to_tflite(model_path, tflite_path):
    """
    将 Keras 模型转换为 TensorFlow Lite 格式。

    参数：
    model_path (str): 已保存的 Keras 模型路径（.h5 文件）
    tflite_path (str): 转换后的 TensorFlow Lite 模型保存路径（.tflite 文件）
    """
    try:
        # 确保模型文件存在
        if not os.path.exists(model_path):
            raise FileNotFoundError(f"模型文件 {model_path} 不存在")

        # 加载 Keras 模型
        model = tf.keras.models.load_model(model_path)
        print(f"成功加载模型: {model_path}")

        # 创建 TFLite 转换器
        converter = tf.lite.TFLiteConverter.from_keras_model(model)

        # 可选：设置优化选项
        converter.optimizations = [tf.lite.Optimize.DEFAULT]

        # 转换为 TFLite 模型
        tflite_model = converter.convert()
        print("模型转换成功")

        # 保存 TFLite 模型
        os.makedirs(os.path.dirname(tflite_path), exist_ok=True)
        with open(tflite_path, 'wb') as f:
            f.write(tflite_model)
        print(f"TFLite 模型已保存至: {tflite_path}")

    except Exception as e:
        print(f"转换失败: {e}")

def main():
    """
    主函数：加载数据、训练 LSTM 模型（多种损失函数）、保存模型和可视化结果。
    """
    try:
        # 加载数据
        X, scaler = load_X('featuresdata.csv')
        y = load_y('labelsdata.csv')

        # 验证数据形状
        validate_data_shapes(X, y)

        # 打印类别分布
        unique, counts = np.unique(y, return_counts=True)
        print(f"类别分布: {dict(zip(unique, counts))}")

        # 计算类权重
        class_weights_dict, class_weights_array = compute_class_weights(y)
        print(f"类权重: {class_weights_dict}")

        # 转换为 one-hot 编码
        y_one_hot = one_hot(y, N_CLASSES)

        # 重新组织数据形状
        X_reshaped = reshape_data(X, N_STEPS, N_SIGNALS)

        # 划分训练、验证和测试集
        X_temp, X_test, y_temp, y_test = train_test_split(
            X_reshaped, y_one_hot, test_size=0.15, random_state=42, stratify=y
        )
        X_train, X_val, y_train, y_val = train_test_split(
            X_temp, y_temp, test_size=0.1765, random_state=42, stratify=np.argmax(y_temp, axis=1)
        )

        # 定义损失函数列表
        loss_functions = [
            ('categorical_crossentropy', None),
            ('categorical_hinge', None),
            ('weighted_categorical_crossentropy', class_weights_array)
        ]

        # 创建时间戳子文件夹
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        save_dir = os.path.join(TRAIN_DIR, timestamp)

        # 保存 scaler 参数
        save_scaler_params(scaler, save_dir)

        # 为每种损失函数训练模型
        for loss_name, weights in loss_functions:
            print(f"\n训练模型使用损失函数: {loss_name}")

            # 构建模型
            model = LSTM_RNN(
                n_steps=N_STEPS,
                n_input=N_SIGNALS,
                n_hidden=N_HIDDEN,
                n_classes=N_CLASSES,
                lambda_loss_amount=LAMBDA_LOSS_AMOUNT,
                loss_function=loss_name,
                weights=weights
            )

            # 定义回调函数
            early_stopping = tf.keras.callbacks.EarlyStopping(
                monitor='val_loss',
                patience=10,
                restore_best_weights=True
            )
            reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=5,
                min_lr=1e-6
            )

            # 训练模型
            history = model.fit(
                X_train,
                y_train,
                batch_size=BATCH_SIZE,
                epochs=EPOCHS,
                validation_data=(X_val, y_val),
                callbacks=[early_stopping, reduce_lr],
                class_weight=class_weights_dict if loss_name != 'weighted_categorical_crossentropy' else None,
                verbose=1
            )

            # 可视化训练效果
            plot_training_history(history, save_dir, loss_name)

            # 评估模型
            test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
            print(f"测试集损失 ({loss_name}): {test_loss:.4f}, 测试集准确率: {test_accuracy:.4f}")

            # 生成分类报告
            y_pred = model.predict(X_test)
            y_pred_classes = np.argmax(y_pred, axis=1)
            y_test_classes = np.argmax(y_test, axis=1)
            print(f"\n分类报告 ({loss_name}):")
            print(classification_report(y_test_classes, y_pred_classes, target_names=['Class 0', 'Class 1', 'Class 2']))

            # 保存模型
            model_path = os.path.join(save_dir, f'lstm_model_{loss_name}.h5')
            model.save(model_path)
            print(f"模型已保存至: {model_path}")
            converter = tf.lite.TFLiteConverter.from_keras_model(model)
            converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
    ]
            tflite_model = converter.convert()

            # 保存转换后的 TensorFlow Lite 模型
            with open('my_model.tflite', 'wb') as f:
                f.write(tflite_model)
            print("TensorFlow Lite 模型已保存为 my_model.tflite")




    except Exception as e:
        print(f"训练失败: {e}")

if __name__ == "__main__":
    main()

# 部署端归一化示例
def deploy_normalize(X, min_path, max_path):
    """
    在部署端使用保存的最大值和最小值进行归一化。
    """
    data_min = np.load(min_path)
    data_max = np.load(max_path)

    denominator = data_max - data_min
    denominator[denominator == 0] = 1
    X_scaled = (X - data_min) / denominator
    return X_scaled

特征数据形状: (20636, 180)
数据验证通过：20636 样本
类别分布: {0: 7682, 1: 4689, 2: 8265}
类权重: {0: 0.8954265382278921, 1: 1.4669794554631408, 2: 0.8322645694696511}
Scaler 最小值已保存至: train\20250731_224748\scaler_min_20250731_224748.npy
Scaler 最大值已保存至: train\20250731_224748\scaler_max_20250731_224748.npy

训练模型使用损失函数: categorical_crossentropy
Epoch 1/24
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - accuracy: 0.7085 - loss: 1.3666 - val_accuracy: 0.9567 - val_loss: 0.5711 - learning_rate: 0.0010
Epoch 2/24
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8877 - loss: 0.6864 - val_accuracy: 0.9706 - val_loss: 0.4025 - learning_rate: 0.0010
Epoch 3/24
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9150 - loss: 0.5006 - val_accuracy: 0.9713 - val_loss: 0.3099 - learning_rate: 0.0010
Epoch 4/24
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9119 - loss: 0.4247 




分类报告 (categorical_crossentropy):
              precision    recall  f1-score   support

     Class 0       0.98      1.00      0.99      1153
     Class 1       1.00      0.99      1.00       703
     Class 2       0.99      0.98      0.99      1240

    accuracy                           0.99      3096
   macro avg       0.99      0.99      0.99      3096
weighted avg       0.99      0.99      0.99      3096

模型已保存至: train\20250731_224748\lstm_model_categorical_crossentropy.h5
INFO:tensorflow:Assets written to: C:\Users\-\AppData\Local\Temp\tmpw849j058\assets


INFO:tensorflow:Assets written to: C:\Users\-\AppData\Local\Temp\tmpw849j058\assets


Saved artifact at 'C:\Users\-\AppData\Local\Temp\tmpw849j058'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 2, 90), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)
Captures:
  1857294364368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857480170896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857294361296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857294365712: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857294367248: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857294364560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857294362256: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857294366864: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857298608784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857298605136: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857298608016: T




分类报告 (categorical_hinge):
              precision    recall  f1-score   support

     Class 0       0.93      0.96      0.94      1153
     Class 1       1.00      0.99      0.99       703
     Class 2       0.96      0.93      0.94      1240

    accuracy                           0.95      3096
   macro avg       0.96      0.96      0.96      3096
weighted avg       0.95      0.95      0.95      3096

模型已保存至: train\20250731_224748\lstm_model_categorical_hinge.h5
INFO:tensorflow:Assets written to: C:\Users\-\AppData\Local\Temp\tmp2lywb2vd\assets


INFO:tensorflow:Assets written to: C:\Users\-\AppData\Local\Temp\tmp2lywb2vd\assets


Saved artifact at 'C:\Users\-\AppData\Local\Temp\tmp2lywb2vd'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 2, 90), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)
Captures:
  1857534013712: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857534009872: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857534012752: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857534014864: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855441916880: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855441922832: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855441923216: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855441916112: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855441921872: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855441923408: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855441916496: T




分类报告 (weighted_categorical_crossentropy):
              precision    recall  f1-score   support

     Class 0       0.99      0.99      0.99      1153
     Class 1       1.00      1.00      1.00       703
     Class 2       0.99      0.99      0.99      1240

    accuracy                           0.99      3096
   macro avg       0.99      0.99      0.99      3096
weighted avg       0.99      0.99      0.99      3096

模型已保存至: train\20250731_224748\lstm_model_weighted_categorical_crossentropy.h5
INFO:tensorflow:Assets written to: C:\Users\-\AppData\Local\Temp\tmp116ahu7u\assets


INFO:tensorflow:Assets written to: C:\Users\-\AppData\Local\Temp\tmp116ahu7u\assets


Saved artifact at 'C:\Users\-\AppData\Local\Temp\tmp116ahu7u'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 2, 90), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)
Captures:
  1855460472400: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855460472784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855460470864: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855518292688: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855518293648: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855460467024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855518292304: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855518293840: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1855518293264: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857534010832: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1857534011216: T

转化

In [8]:
import tensorflow as tf

# 加载H5模型
model = tf.keras.models.load_model('lstm_model_categorical_crossentropy.h5')

# 创建TFLite转换器
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# 转换为TFLite模型
tflite_model = converter.convert()

# 保存TFLite模型
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'lstm_model_categorical_crossentropy.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)