In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# 假设您已经加载了数据到data_full
data_full = pd.read_excel('C:/1作品/clothse_excel/train1800.xlsx')

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer



# 对分类特征进行编码
label_encoder = LabelEncoder()
data_full['wether'] = label_encoder.fit_transform(data_full['wether'])
data_full['season'] = label_encoder.fit_transform(data_full['season'])


features = data_full[['max_tem', 'min_tem', 'mean_tem', 'average_humidity', 'average_wind_speed(m/s)', 'sensible_temperature','season','wether']]

scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

labels = data_full[['label1', 'label2', 'label3']]

encoder = OneHotEncoder(sparse=False)
labels_encoded = encoder.fit_transform(labels)

X_train, X_test, y_train, y_test = train_test_split(features_standardized, labels_encoded, test_size=0.2, random_state=42)





In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

def build_model(input_shape, output_units, learning_rate=0.001, dropout_rate=0.25):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_shape,)),
        Dropout(dropout_rate),
        Dense(128, activation='relu'),
        Dropout(dropout_rate),
        Dense(output_units, activation='sigmoid')
    ])
    
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [5]:
import optuna

def objective(trial):
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)

    model = build_model(X_train.shape[1], y_train.shape[1], learning_rate, dropout_rate)
    X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    history = model.fit(X_train_split, y_train_split, epochs=30, validation_data=(X_val_split, y_val_split), verbose=0, batch_size=32)
    
    # 获取验证集上的最佳损失
    best_loss = min(history.history['val_loss'])
    return best_loss

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20) # 根据需要调整试验次数

best_params = study.best_trial.params
print('Best parameters:', best_params)


[I 2024-02-13 12:42:17,528] A new study created in memory with name: no-name-300e5f91-0e33-4131-82da-5d72ac82513b
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-02-13 12:42:24,869] Trial 0 finished with value: 0.14937977492809296 and parameters: {'learning_rate': 0.007078668283306361, 'dropout_rate': 0.3940963456645584}. Best is trial 0 with value: 0.14937977492809296.
[I 2024-02-13 12:42:32,493] Trial 1 finished with value: 0.149396613240242 and parameters: {'learning_rate': 0.0009315911954880989, 'dropout_rate': 0.38753918326618286}. Best is trial 0 with value: 0.14937977492809296.
[I 2024-02-13 12:42:39,696] Trial 2 finished with value: 0.152899369597435 and parameters: {'learning_rate': 0.00021263151900675065, 'dropout_rate': 0.2135114356294884}. Best is trial 0 with value: 0.14937977492809296.
[I 2024-02-13 12:42:46,615] Trial 3 finished with value: 0.1489623337984085 and parameters: 

Best parameters: {'learning_rate': 0.006625361588434237, 'dropout_rate': 0.29961161187822294}


In [6]:
model = build_model(X_train.shape[1], y_train.shape[1], best_params['learning_rate'], best_params['dropout_rate'])
history = model.fit(X_train, y_train, epochs=100, validation_split=0.2, verbose=2, batch_size=32)


Epoch 1/100
36/36 - 1s - loss: 0.2441 - accuracy: 0.2502 - val_loss: 0.1616 - val_accuracy: 0.3646 - 1s/epoch - 28ms/step
Epoch 2/100
36/36 - 0s - loss: 0.1609 - accuracy: 0.3536 - val_loss: 0.1551 - val_accuracy: 0.3194 - 78ms/epoch - 2ms/step
Epoch 3/100
36/36 - 0s - loss: 0.1561 - accuracy: 0.3145 - val_loss: 0.1550 - val_accuracy: 0.3854 - 85ms/epoch - 2ms/step
Epoch 4/100
36/36 - 0s - loss: 0.1546 - accuracy: 0.3180 - val_loss: 0.1547 - val_accuracy: 0.3819 - 84ms/epoch - 2ms/step
Epoch 5/100
36/36 - 0s - loss: 0.1520 - accuracy: 0.3458 - val_loss: 0.1533 - val_accuracy: 0.3611 - 83ms/epoch - 2ms/step
Epoch 6/100
36/36 - 0s - loss: 0.1512 - accuracy: 0.3510 - val_loss: 0.1521 - val_accuracy: 0.3125 - 85ms/epoch - 2ms/step
Epoch 7/100
36/36 - 0s - loss: 0.1499 - accuracy: 0.3354 - val_loss: 0.1526 - val_accuracy: 0.3021 - 84ms/epoch - 2ms/step
Epoch 8/100
36/36 - 0s - loss: 0.1499 - accuracy: 0.3301 - val_loss: 0.1525 - val_accuracy: 0.3576 - 104ms/epoch - 3ms/step
Epoch 9/100
36/3

In [7]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f'Test accuracy: {test_acc}, Test loss: {test_loss}')


12/12 - 0s - loss: 0.1432 - accuracy: 0.3389 - 156ms/epoch - 13ms/step
Test accuracy: 0.33888888359069824, Test loss: 0.14319536089897156


In [8]:
from sklearn.metrics import multilabel_confusion_matrix

# 假设y_test是测试集上的真实标签，y_pred是模型预测的标签
y_pred = model.predict(X_test) > 0.5  # 采用0.5作为分类阈值
confusion_matrices = multilabel_confusion_matrix(y_test, y_pred)

# 打印每个标签的混淆矩阵
for i, matrix in enumerate(confusion_matrices):
    print(f"标签 {i} 的混淆矩阵:\n{matrix}\n")


标签 0 的混淆矩阵:
[[356   0]
 [  4   0]]

标签 1 的混淆矩阵:
[[195  15]
 [ 44 106]]

标签 2 的混淆矩阵:
[[310   0]
 [ 50   0]]

标签 3 的混淆矩阵:
[[350   0]
 [ 10   0]]

标签 4 的混淆矩阵:
[[311   7]
 [ 31  11]]

标签 5 的混淆矩阵:
[[360   0]
 [  0   0]]

标签 6 的混淆矩阵:
[[348   0]
 [ 12   0]]

标签 7 的混淆矩阵:
[[350   0]
 [ 10   0]]

标签 8 的混淆矩阵:
[[360   0]
 [  0   0]]

标签 9 的混淆矩阵:
[[346   0]
 [ 14   0]]

标签 10 的混淆矩阵:
[[338   0]
 [ 22   0]]

标签 11 的混淆矩阵:
[[354   0]
 [  6   0]]

标签 12 的混淆矩阵:
[[358   0]
 [  2   0]]

标签 13 的混淆矩阵:
[[359   0]
 [  1   0]]

标签 14 的混淆矩阵:
[[345   0]
 [ 15   0]]

标签 15 的混淆矩阵:
[[359   0]
 [  1   0]]

标签 16 的混淆矩阵:
[[359   0]
 [  1   0]]

标签 17 的混淆矩阵:
[[360   0]
 [  0   0]]

标签 18 的混淆矩阵:
[[340   0]
 [ 20   0]]

标签 19 的混淆矩阵:
[[322   0]
 [ 38   0]]

标签 20 的混淆矩阵:
[[328   0]
 [ 32   0]]

标签 21 的混淆矩阵:
[[360   0]
 [  0   0]]

标签 22 的混淆矩阵:
[[335   0]
 [ 25   0]]

标签 23 的混淆矩阵:
[[359   0]
 [  1   0]]

标签 24 的混淆矩阵:
[[360   0]
 [  0   0]]

标签 25 的混淆矩阵:
[[360   0]
 [  0   0]]

标签 26 的混淆矩阵:
[[360   0]
 [  0   0]]

标签 27 的混淆矩阵

In [9]:
from sklearn.metrics import confusion_matrix
import numpy as np

# 假设 y_test 是真实的标签矩阵，y_pred 是预测的标签矩阵，且它们都是二进制的（0或1）
# 对于多标签问题，我们可以通过将所有标签的预测视为一个长向量来简化问题
y_test_flattened = y_test.flatten()
y_pred_flattened = y_pred.flatten()

# 计算整体混淆矩阵
conf_matrix = confusion_matrix(y_test_flattened, y_pred_flattened)

print("整体混淆矩阵:")
print(conf_matrix)


整体混淆矩阵:
[[16460   100]
 [  876   204]]


In [10]:
from sklearn.metrics import accuracy_score

# 计算整体精度
y_pred_binary = (y_pred > 0.5)  # 将模型的预测结果转换为二进制形式
overall_accuracy = accuracy_score(y_test, y_pred_binary)
print(f"整体精度: {overall_accuracy}")

# 分别计算每个标签的正确率
for i, label in enumerate(encoder.categories_[0]):
    label_accuracy = accuracy_score(y_test[:, i], y_pred_binary[:, i])
    print(f"{label} 的正确率: {label_accuracy}")


整体精度: 0.013888888888888888
Hoodie 的正确率: 0.9888888888888889
Unknown 的正确率: 0.8361111111111111
coat 的正确率: 0.8611111111111112
denim jacket 的正确率: 0.9722222222222222
jackets 的正确率: 0.8944444444444445
jeans 的正确率: 1.0
knitted coat 的正确率: 0.9666666666666667
long down jacket 的正确率: 0.9722222222222222
long-sleeved dress 的正确率: 1.0
shirts 的正确率: 0.9611111111111111
short down jacket 的正确率: 0.9388888888888889
short trench coat 的正确率: 0.9833333333333333
short woolen coat 的正确率: 0.9944444444444445
shorts 的正确率: 0.9972222222222222
suit jackets 的正确率: 0.9583333333333334
sweater 的正确率: 0.9972222222222222
t-shirts 的正确率: 0.9972222222222222
vest 的正确率: 1.0
windbreaker 的正确率: 0.9444444444444444


In [11]:
# model.save('C:/1作品/picture_myclothes/my_model.h5')
