## train model1

In [None]:
!pip install tensorflow




In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 設定檔案路徑
data_path = "/content/drive/My Drive/colab/PlantCLEF_Subset/PlantCLEF_Subset/"
train_path = data_path + "train/"
val_path = data_path + "val/"
test_path = data_path + "test/"
label_file = data_path + "labels.txt"

# 載入標籤
with open(label_file, 'r') as file:
    labels = [line.strip() for line in file]


In [None]:
labels

['ash',
 'beech',
 'cattail',
 'cedar',
 'clover',
 'cyprus',
 'daisy',
 'dandelion',
 'dogwood',
 'elm',
 'fern',
 'fig',
 'fir',
 'juniper',
 'maple',
 'poison_ivy',
 'sweetgum',
 'sycamore',
 'trout_lily',
 'tulip_tree']

In [None]:
# 資料收集
def load_data(directory, max_images=100):
    data = []
    labels = []
    for label in os.listdir(directory):
        label_path = os.path.join(directory, label)
        count = 0
        for filename in os.listdir(label_path):
            if count >= max_images:
                break
            img_path = os.path.join(label_path, filename)
            img = load_img(img_path, target_size=(224, 224))
            img_array = img_to_array(img)
            data.append(img_array)
            labels.append(label)
            count += 1
    return np.array(data), np.array(labels)

In [None]:
# 資料預處理
def preprocess_data(data, labels):
    data = data.astype('float') / 255.0
    labels = LabelEncoder().fit_transform(labels)
    labels = to_categorical(labels)
    return data, labels

In [None]:
# 加載訓練和驗證數據
X_train, y_train = load_data(train_path)
X_val, y_val = load_data(val_path)

In [None]:
# 資料預處理
X_train, y_train = preprocess_data(X_train, y_train)
X_val, y_val = preprocess_data(X_val, y_val)

In [None]:
# 特徵提取 (使用MobileNetV2模型)
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
predictions = Dense(len(labels), activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


In [None]:
# 凍結預訓練層
for layer in base_model.layers:
    layer.trainable = False

# 模型訓練
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f29146c7e80>

In [None]:
# 後處理和植物資訊提供
def predict_plant(image_path):
    img = load_img(image_path, target_size=(224, 224))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = img_array / 255.0

    prediction = model.predict(img_array)
    predicted_label = labels[np.argmax(prediction)]

    # 使用 os.path.splitext 移除檔案名稱的擴展名
    file_name = os.path.splitext(os.path.basename(image_path))[0]

    # 輸出檔案名稱和預測結果
    print("檔案名稱：", file_name, "_預測結果：", predicted_label)

In [None]:
# 測試
test_images = os.listdir(test_path)
for image_file in test_images:
    image_path = os.path.join(test_path, image_file)
    predict_plant(image_path)

檔案名稱： tulip_tree_1 _預測結果： tulip_tree
檔案名稱： daisy_1 _預測結果： daisy
檔案名稱： elm_1 _預測結果： elm
檔案名稱： maple _預測結果： poison_ivy
檔案名稱： fig _預測結果： fig
檔案名稱： juniper _預測結果： juniper
檔案名稱： fern _預測結果： fir
檔案名稱： fir _預測結果： fir
檔案名稱： poison_ivy _預測結果： poison_ivy
檔案名稱： fern_1 _預測結果： elm
檔案名稱： maple_1 _預測結果： maple
檔案名稱： fir_1 _預測結果： fir
檔案名稱： dogwood_1 _預測結果： dogwood
檔案名稱： sycamore _預測結果： fig
檔案名稱： cyprus _預測結果： cyprus
檔案名稱： cattail_0 _預測結果： cattail
檔案名稱： dandelion _預測結果： dandelion
檔案名稱： fir_2 _預測結果： fir
檔案名稱： clover _預測結果： clover
檔案名稱： daisy _預測結果： daisy
檔案名稱： sycamore_1 _預測結果： sycamore
檔案名稱： cattail _預測結果： cattail
檔案名稱： elm _預測結果： elm
檔案名稱： dogwood _預測結果： dogwood
檔案名稱： sycamore_2 _預測結果： maple
檔案名稱： juniper_1 _預測結果： juniper
檔案名稱： trout_lily _預測結果： trout_lily
檔案名稱： tulip_tree _預測結果： tulip_tree
檔案名稱： cedar _預測結果： cedar
檔案名稱： poison_ivy_1 _預測結果： poison_ivy


In [None]:
# 創建一個字典，將標籤轉換為數字
label_to_index = {label: i for i, label in enumerate(labels)}

# 初始化計數器
correct_predictions = 0
total_predictions = 0

# 計算準確率
for image_file in test_images:
    image_path = os.path.join(test_path, image_file)
    img = load_img(image_path, target_size=(224, 224))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = img_array / 255.0

    # 進行預測
    prediction = model.predict(img_array)
    predicted_label = labels[np.argmax(prediction)]

    # 獲取實際標籤
    actual_label = image_file.split('.')[0]  # 去掉檔案名稱的擴展名

    # 比較預測結果和實際標籤
    if actual_label == predicted_label or actual_label.startswith(predicted_label):
        correct_predictions += 1

    total_predictions += 1

# 計算並印出準確率
accuracy = correct_predictions / total_predictions
print("準確率：", accuracy)

準確率： 0.8333333333333334


## Train model


In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import img_to_array, load_img, ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from sklearn.metrics import classification_report, confusion_matrix
from google.colab import drive
drive.mount('/content/drive')

# 設定檔案路徑
data_path = "/content/drive/My Drive/colab/PlantCLEF_Subset/PlantCLEF_Subset/"
train_path = data_path + "train/"
val_path = data_path + "val/"
test_path = data_path + "test/"
label_file = data_path + "labels.txt"

# 載入標籤
with open(label_file, 'r') as file:
    labels = [line.strip() for line in file]

# 資料收集
def load_data(directory, max_images=100):
    data = []
    labels = []
    for label in os.listdir(directory):
        label_path = os.path.join(directory, label)
        count = 0
        for filename in os.listdir(label_path):
            if count >= max_images:
                break
            img_path = os.path.join(label_path, filename)
            img = load_img(img_path, target_size=(224, 224))
            img_array = img_to_array(img)
            data.append(img_array)
            labels.append(label)
            count += 1
    return np.array(data), np.array(labels)

# 資料預處理
def preprocess_data(data, labels):
    data = data.astype('float') / 255.0
    labels = LabelEncoder().fit_transform(labels)
    labels = to_categorical(labels)
    return data, labels

# 加載訓練和驗證數據
X_train, y_train = load_data(train_path)
X_val, y_val = load_data(val_path)

# 資料預處理
X_train, y_train = preprocess_data(X_train, y_train)
X_val, y_val = preprocess_data(X_val, y_val)

# 資料增強
datagen_train = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# 特徵提取 (使用MobileNetV2模型)
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
predictions = Dense(len(labels), activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# 凍結預訓練層
for layer in base_model.layers:
    layer.trainable = False

# 模型訓練
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 設定回調函數
checkpoint = ModelCheckpoint('/content/drive/My Drive/colab/plant_model.h5',
                             monitor='val_loss',
                             save_best_only=True,
                             mode='min',
                             verbose=1)

reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.2,
                              patience=3,
                              min_lr=0.0001,
                              mode='min',
                              verbose=1)

# 訓練模型
history = model.fit(datagen_train.flow(X_train, y_train, batch_size=32),
                    steps_per_epoch=len(X_train) / 32,
                    epochs=10,
                    validation_data=(X_val, y_val),
                    callbacks=[checkpoint, reduce_lr])

# 載入最佳模型
model.load_weights('/content/drive/My Drive/colab/plant_model.h5')

# 混淆矩陣和分類報告
y_pred = model.predict(X_val)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_val, axis=1)

conf_matrix = confusion_matrix(y_true, y_pred_classes)
class_report = classification_report(y_true, y_pred_classes, target_names=labels)

print("混淆矩陣：\n", conf_matrix)
print("\n分類報告：\n", class_report)


Mounted at /content/drive
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
Epoch 1/10
Epoch 1: val_loss improved from inf to 2.33750, saving model to /content/drive/My Drive/colab/plant_model.h5


  saving_api.save_model(


Epoch 2/10
Epoch 2: val_loss improved from 2.33750 to 2.26522, saving model to /content/drive/My Drive/colab/plant_model.h5
Epoch 3/10
Epoch 3: val_loss improved from 2.26522 to 2.09129, saving model to /content/drive/My Drive/colab/plant_model.h5
Epoch 4/10
Epoch 4: val_loss did not improve from 2.09129
Epoch 5/10
Epoch 5: val_loss improved from 2.09129 to 1.93509, saving model to /content/drive/My Drive/colab/plant_model.h5
Epoch 6/10
Epoch 6: val_loss did not improve from 1.93509
Epoch 7/10
Epoch 7: val_loss did not improve from 1.93509
Epoch 8/10
Epoch 8: val_loss did not improve from 1.93509

Epoch 8: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 9/10
Epoch 9: val_loss did not improve from 1.93509
Epoch 10/10
Epoch 10: val_loss did not improve from 1.93509
混淆矩陣：
 [[18  6  0  0  2  2  0  0  1  3  0  0  1  0  0  4  1  5  0  1]
 [ 4 18  0  0  0  3  0  1  0  8  1  1  0  1  8 11  3 40  1  0]
 [ 0  0 26  0  0  0  0  0  0  0  2  0  0  0  0  0  0  0  0  0]
 [ 2

In [2]:
# 計算測試集的準確率
# 初始化計數器
correct_predictions = 0
total_predictions = 0

# 計算準確率
for image_file in os.listdir(test_path):
    image_path = os.path.join(test_path, image_file)
    img = load_img(image_path, target_size=(224, 224))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = img_array / 255.0

    # 進行預測
    prediction = model.predict(img_array)
    predicted_label = labels[np.argmax(prediction)]

    # 獲取實際標籤
    actual_label = image_file.split('.')[0]  # 去掉檔案名稱的擴展名

    # 比較預測結果和實際標籤
    if actual_label == predicted_label or actual_label.startswith(predicted_label):
        correct_predictions += 1

    total_predictions += 1

# 計算並印出準確率
accuracy = correct_predictions / total_predictions
print("測試集準確率：", accuracy)


測試集準確率： 0.8


## next