In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import optuna
from sklearn.metrics import multilabel_confusion_matrix, confusion_matrix, accuracy_score
import numpy as np

In [5]:
# データをロードします
data_full = pd.read_excel('C:/1作品/clothse_excel/train1800.xlsx')

# カテゴリー特徴をエンコードします
label_encoder = LabelEncoder()
data_full['wether'] = label_encoder.fit_transform(data_full['wether'])
data_full['season'] = label_encoder.fit_transform(data_full['season'])

In [6]:
# 特徴を準備します
features = data_full[['max_tem', 'min_tem', 'mean_tem', 'average_humidity', 'average_wind_speed(m/s)', 'sensible_temperature', 'season', 'wether']]

# 特徴を標準化します
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# ラベルを準備します
labels = data_full[['label1', 'label2', 'label3']]

# ラベルをOneHotエンコードします
encoder = OneHotEncoder(sparse=False)
labels_encoded = encoder.fit_transform(labels)

# データセットを分割します
X_train, X_test, y_train, y_test = train_test_split(features_standardized, labels_encoded, test_size=0.2, random_state=42)

# モデルを構築します
def build_model(input_shape, output_units, learning_rate=0.001, dropout_rate=0.25):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_shape,)),
        Dropout(dropout_rate),
        Dense(128, activation='relu'),
        Dropout(dropout_rate),
        Dense(output_units, activation='sigmoid')
    ])
    
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model



In [7]:
# Optunaでハイパーパラメータを最適化します
def objective(trial):
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)

    model = build_model(X_train.shape[1], y_train.shape[1], learning_rate, dropout_rate)
    X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    history = model.fit(X_train_split, y_train_split, epochs=30, validation_data=(X_val_split, y_val_split), verbose=0, batch_size=32)
    
    best_loss = min(history.history['val_loss'])
    return best_loss

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)

best_params = study.best_trial.params
print('最適なパラメータ:', best_params)

[I 2024-02-13 11:37:41,783] A new study created in memory with name: no-name-6317a0fb-ec61-466a-b327-5201fb1844f9
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-02-13 11:37:45,778] Trial 0 finished with value: 0.1484425663948059 and parameters: {'learning_rate': 0.003122902388408672, 'dropout_rate': 0.3674624353040392}. Best is trial 0 with value: 0.1484425663948059.
[I 2024-02-13 11:37:50,294] Trial 1 finished with value: 0.15534457564353943 and parameters: {'learning_rate': 0.00017379618439706648, 'dropout_rate': 0.3501111175193946}. Best is trial 0 with value: 0.1484425663948059.
[I 2024-02-13 11:37:54,695] Trial 2 finished with value: 0.14843209087848663 and parameters: {'learning_rate': 0.0034753759060548615, 'dropout_rate': 0.25530860773381703}. Best is trial 2 with value: 0.14843209087848663.
[I 2024-02-13 11:37:58,912] Trial 3 finished with value: 0.15532656013965607 and parameters

最適なパラメータ: {'learning_rate': 0.0034658138415709348, 'dropout_rate': 0.10875736006256398}


In [8]:
# 最適なパラメータでモデルを訓練します
model = build_model(X_train.shape[1], y_train.shape[1], best_params['learning_rate'], best_params['dropout_rate'])
history = model.fit(X_train, y_train, epochs=100, validation_split=0.2, verbose=2, batch_size=32)

Epoch 1/100
36/36 - 2s - loss: 0.2736 - accuracy: 0.2728 - val_loss: 0.1653 - val_accuracy: 0.4167 - 2s/epoch - 45ms/step
Epoch 2/100
36/36 - 0s - loss: 0.1592 - accuracy: 0.3562 - val_loss: 0.1565 - val_accuracy: 0.4028 - 157ms/epoch - 4ms/step
Epoch 3/100
36/36 - 0s - loss: 0.1535 - accuracy: 0.3475 - val_loss: 0.1556 - val_accuracy: 0.3368 - 203ms/epoch - 6ms/step
Epoch 4/100
36/36 - 0s - loss: 0.1518 - accuracy: 0.3475 - val_loss: 0.1543 - val_accuracy: 0.3090 - 204ms/epoch - 6ms/step
Epoch 5/100
36/36 - 0s - loss: 0.1503 - accuracy: 0.3562 - val_loss: 0.1542 - val_accuracy: 0.3576 - 204ms/epoch - 6ms/step
Epoch 6/100
36/36 - 0s - loss: 0.1484 - accuracy: 0.3328 - val_loss: 0.1537 - val_accuracy: 0.4028 - 200ms/epoch - 6ms/step
Epoch 7/100
36/36 - 0s - loss: 0.1478 - accuracy: 0.3536 - val_loss: 0.1539 - val_accuracy: 0.3646 - 199ms/epoch - 6ms/step
Epoch 8/100
36/36 - 0s - loss: 0.1473 - accuracy: 0.3432 - val_loss: 0.1526 - val_accuracy: 0.3611 - 148ms/epoch - 4ms/step
Epoch 9/10

In [9]:
# モデルを評価します
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f'テスト精度: {test_acc}, テスト損失: {test_loss}')

12/12 - 0s - loss: 0.1430 - accuracy: 0.3556 - 215ms/epoch - 18ms/step
テスト精度: 0.35555556416511536, テスト損失: 0.1429639756679535


In [10]:
# マルチラベル混同行列を計算します
y_pred = model.predict(X_test) > 0.5
confusion_matrices = multilabel_confusion_matrix(y_test, y_pred)

for i, matrix in enumerate(confusion_matrices):
    print(f"ラベル {i} の混同行列:\n{matrix}\n")

ラベル 0 の混同行列:
[[356   0]
 [  4   0]]

ラベル 1 の混同行列:
[[195  15]
 [ 44 106]]

ラベル 2 の混同行列:
[[310   0]
 [ 50   0]]

ラベル 3 の混同行列:
[[350   0]
 [ 10   0]]

ラベル 4 の混同行列:
[[311   7]
 [ 31  11]]

ラベル 5 の混同行列:
[[360   0]
 [  0   0]]

ラベル 6 の混同行列:
[[348   0]
 [ 12   0]]

ラベル 7 の混同行列:
[[350   0]
 [ 10   0]]

ラベル 8 の混同行列:
[[360   0]
 [  0   0]]

ラベル 9 の混同行列:
[[346   0]
 [ 14   0]]

ラベル 10 の混同行列:
[[338   0]
 [ 22   0]]

ラベル 11 の混同行列:
[[354   0]
 [  6   0]]

ラベル 12 の混同行列:
[[358   0]
 [  2   0]]

ラベル 13 の混同行列:
[[359   0]
 [  1   0]]

ラベル 14 の混同行列:
[[345   0]
 [ 15   0]]

ラベル 15 の混同行列:
[[359   0]
 [  1   0]]

ラベル 16 の混同行列:
[[359   0]
 [  1   0]]

ラベル 17 の混同行列:
[[360   0]
 [  0   0]]

ラベル 18 の混同行列:
[[340   0]
 [ 20   0]]

ラベル 19 の混同行列:
[[322   0]
 [ 38   0]]

ラベル 20 の混同行列:
[[328   0]
 [ 32   0]]

ラベル 21 の混同行列:
[[360   0]
 [  0   0]]

ラベル 22 の混同行列:
[[335   0]
 [ 25   0]]

ラベル 23 の混同行列:
[[359   0]
 [  1   0]]

ラベル 24 の混同行列:
[[360   0]
 [  0   0]]

ラベル 25 の混同行列:
[[360   0]
 [  0   0]]

ラベル 26 の混同行列:
[[360   

In [11]:
# 全体の混同行列を計算します
y_test_flattened = y_test.flatten()
y_pred_flattened = y_pred.flatten()
conf_matrix = confusion_matrix(y_test_flattened, y_pred_flattened)
print("全体の混同行列:")
print(conf_matrix)

全体の混同行列:
[[16434   126]
 [  858   222]]


In [12]:
# 全体の精度を計算します
y_pred_binary = (y_pred > 0.5)
overall_accuracy = accuracy_score(y_test, y_pred_binary)
print(f"全体の精度: {overall_accuracy}")

# 各ラベルの正確さを計算します
for i, label in enumerate(encoder.categories_[0]):
    label_accuracy = accuracy_score(y_test[:, i], y_pred_binary[:, i])
    print(f"{label} の正確さ: {label_accuracy}")


全体の精度: 0.013888888888888888
Hoodie の正確さ: 0.9888888888888889
Unknown の正確さ: 0.8361111111111111
coat の正確さ: 0.8611111111111112
denim jacket の正確さ: 0.9722222222222222
jackets の正確さ: 0.8944444444444445
jeans の正確さ: 1.0
knitted coat の正確さ: 0.9666666666666667
long down jacket の正確さ: 0.9722222222222222
long-sleeved dress の正確さ: 1.0
shirts の正確さ: 0.9611111111111111
short down jacket の正確さ: 0.9388888888888889
short trench coat の正確さ: 0.9833333333333333
short woolen coat の正確さ: 0.9944444444444445
shorts の正確さ: 0.9972222222222222
suit jackets の正確さ: 0.9583333333333334
sweater の正確さ: 0.9972222222222222
t-shirts の正確さ: 0.9972222222222222
vest の正確さ: 1.0
windbreaker の正確さ: 0.9444444444444444


In [18]:
import os

# 指定的模型保存路径，确保以.h5结尾
model_path = 'C:/1作品/picture_myclothes/my_model.h5'

# 获取目录路径
directory = os.path.dirname(model_path)

# 如果目录不存在，则创建它
if not os.path.exists(directory):
    os.makedirs(directory)

# 现在可以安全地保存模型了
model.save(model_path)


  saving_api.save_model(
