In [1]:
#pip install optuna


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

# 加载数据
data_full = pd.read_excel('C:/1作品/clothse_excel/train1641.xlsx')

# 填充缺失值
data_full['max_tem'].fillna(data_full['max_tem'].mean(), inplace=True)
data_full['sensible_temperature'].fillna(data_full['sensible_temperature'].mean(), inplace=True)
data_full['label1'].fillna('nothing', inplace=True)
data_full['label2'].fillna('nothing', inplace=True)
# 假设label3没有缺失值

# 特征和标签
features = data_full[['max_tem', 'min_tem', 'mean_tem', 'average_humidity(％)', 'average_wind_speed(m/s)', 'sensible_temperature']]
labels = data_full[['label1', 'label2', 'label3']]

# 标准化特征
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# 编码标签
encoder = OneHotEncoder(sparse=False)
labels_encoded = encoder.fit_transform(labels)

# 分割数据为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(features_standardized, labels_encoded, test_size=0.2, random_state=42)




In [11]:
import optuna
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import numpy as np

# 假设 X_train, y_train 是您的训练数据和标签
# X_train, y_train = ...

def create_model(trial):
    # 为模型的各个超参数定义搜索空间
    num_layers = trial.suggest_int('num_layers', 1, 3)
    dropout_rate = trial.suggest_float('dropout_rate', 0.0, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)

    model = keras.Sequential()
    model.add(keras.layers.Flatten(input_shape=(X_train.shape[1],)))

    # 添加隐藏层
    for i in range(num_layers):
        num_hidden = trial.suggest_int(f'num_hidden_{i}', 10, 200)
        model.add(keras.layers.Dense(num_hidden, activation='relu'))
        model.add(keras.layers.Dropout(dropout_rate))

    model.add(keras.layers.Dense(y_train.shape[1], activation='sigmoid'))

    # 编译模型
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    return model

def objective(trial):
    # 创建模型
    model = create_model(trial)

    # 分割数据为训练集和验证集
    X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(X_train, y_train, test_size=0.2)

    # 训练模型
    model.fit(X_train_split, y_train_split, epochs=10, validation_data=(X_val_split, y_val_split), verbose=0)

    # 在验证集上评估模型性能
    loss, accuracy = model.evaluate(X_val_split, y_val_split, verbose=0)

    # 由于Optuna最小化目标函数，因此返回损失
    return loss

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10) # 可以调整 n_trials 来控制尝试的超参数组合数

print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)


[I 2024-02-03 18:45:35,680] A new study created in memory with name: no-name-69b7a7f9-72b1-4cc9-b6b1-516106dd8c77
[I 2024-02-03 18:45:38,821] Trial 0 finished with value: 0.2974013686180115 and parameters: {'num_layers': 3, 'dropout_rate': 0.4247791971802093, 'learning_rate': 6.201203456724875e-05, 'num_hidden_0': 142, 'num_hidden_1': 43, 'num_hidden_2': 184}. Best is trial 0 with value: 0.2974013686180115.
[I 2024-02-03 18:45:41,515] Trial 1 finished with value: 0.1531328409910202 and parameters: {'num_layers': 1, 'dropout_rate': 0.1619855070806956, 'learning_rate': 0.018740258673417977, 'num_hidden_0': 159}. Best is trial 1 with value: 0.1531328409910202.
[I 2024-02-03 18:45:43,497] Trial 2 finished with value: 0.14888179302215576 and parameters: {'num_layers': 1, 'dropout_rate': 0.30307550914661086, 'learning_rate': 0.013651366116021269, 'num_hidden_0': 135}. Best is trial 2 with value: 0.14888179302215576.
[I 2024-02-03 18:45:46,270] Trial 3 finished with value: 0.14961135387420654

Number of finished trials: 10
Best trial: {'num_layers': 2, 'dropout_rate': 0.09196756340622869, 'learning_rate': 0.04305435574471603, 'num_hidden_0': 55, 'num_hidden_1': 24}


In [12]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(10, activation='relu'), # 第一隐藏层
    tf.keras.layers.Dropout(0.022), # Dropout层
    tf.keras.layers.Dense(77, activation='relu'), # 第二隐藏层
    tf.keras.layers.Dropout(0.022), # Dropout层
    tf.keras.layers.Dense(16, activation='relu'), # 第三隐藏层
    tf.keras.layers.Dropout(0.022), # Dropout层
    tf.keras.layers.Dense(y_train.shape[1], activation='sigmoid') # 输出层
])

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0253)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=100, validation_split=0.2, verbose=2)


Epoch 1/100
33/33 - 1s - loss: 0.2393 - accuracy: 0.3184 - val_loss: 0.1662 - val_accuracy: 0.3308 - 1s/epoch - 45ms/step
Epoch 2/100
33/33 - 0s - loss: 0.1579 - accuracy: 0.3680 - val_loss: 0.1588 - val_accuracy: 0.3308 - 92ms/epoch - 3ms/step
Epoch 3/100
33/33 - 0s - loss: 0.1548 - accuracy: 0.3889 - val_loss: 0.1574 - val_accuracy: 0.3726 - 79ms/epoch - 2ms/step
Epoch 4/100
33/33 - 0s - loss: 0.1528 - accuracy: 0.3708 - val_loss: 0.1574 - val_accuracy: 0.3726 - 100ms/epoch - 3ms/step
Epoch 5/100
33/33 - 0s - loss: 0.1517 - accuracy: 0.3556 - val_loss: 0.1584 - val_accuracy: 0.3346 - 88ms/epoch - 3ms/step
Epoch 6/100
33/33 - 0s - loss: 0.1511 - accuracy: 0.3584 - val_loss: 0.1587 - val_accuracy: 0.3612 - 126ms/epoch - 4ms/step
Epoch 7/100
33/33 - 0s - loss: 0.1495 - accuracy: 0.3670 - val_loss: 0.1557 - val_accuracy: 0.3004 - 115ms/epoch - 3ms/step
Epoch 8/100
33/33 - 0s - loss: 0.1486 - accuracy: 0.3422 - val_loss: 0.1554 - val_accuracy: 0.2814 - 117ms/epoch - 4ms/step
Epoch 9/100
3

<keras.src.callbacks.History at 0x2296cc55870>

In [20]:
# 训练模

# 保存模型
model.save('C:/1作品/picture_myclothes/clothesrecommendationmodel.h5')


  saving_api.save_model(
