In [1]:
import os
import kagglehub

# 使用KaggleHub下载数据集
dataset_name = "huizecai/mushroom"
path = kagglehub.dataset_download(dataset_name)

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/mushroom


In [2]:
import tensorflow as tf

dataset_path = path + '/archive/data'
label_path = path + '/archive/label.txt'

# 加载和准备图像数据集
train_dataset, validation_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    directory=dataset_path,
    image_size=(224, 224),
    batch_size=32,
    validation_split=0.1,
    subset='both',
    label_mode='categorical',
    seed=44
)

Found 5698 files belonging to 36 classes.
Using 5129 files for training.
Using 569 files for validation.


In [3]:
def get_MobileNetV1():
  # 初始化基础模型
  pre_trained_model = tf.keras.applications.MobileNet(
      input_shape=(224, 224, 3),
      include_top=False,
      weights='imagenet'
  )

  # 冻结基础模型的权重
  for layer in pre_trained_model.layers:
      layer.trainable = False

  x = tf.keras.applications.mobilenet.preprocess_input(pre_trained_model.input)

  # 添加自定义顶层分类器
  x = pre_trained_model(x)
  x = tf.keras.layers.GlobalAveragePooling2D()(x)
  x = tf.keras.layers.Dropout(0.5)(x)
  x = tf.keras.layers.Dense(128, activation='relu')(x)
  x = tf.keras.layers.Dense(256, activation='relu')(x)
  predictions = tf.keras.layers.Dense(36, activation='softmax')(x)

  # 构建最终模型
  MobileNetV1 = tf.keras.models.Model(inputs=pre_trained_model.input, outputs=predictions)

  return MobileNetV1

In [5]:
from tqdm import tqdm
import os
import tensorflow as tf

def record_training_errors(model, dataset_path, output_file='error_images.txt'):
    """
    使用与训练一致的方式加载数据集，并记录预测错误的图片路径
    """
    # 加载数据集
    dataset = tf.keras.preprocessing.image_dataset_from_directory(
        directory=dataset_path,
        image_size=(224, 224, 3),
        batch_size=32,
        label_mode='categorical',
        shuffle=False  # 确保顺序一致
    )
    
    error_records = []
    total_images = 0
    total_errors = 0
    
    # 使用tqdm显示进度条
    for images, labels in tqdm(dataset, desc="Processing dataset", unit="batch"):
        # 进行预测
        predictions = model.predict(images, verbose=0)
        predicted_classes = tf.argmax(predictions, axis=1)
        true_classes = tf.argmax(labels, axis=1)
        
        # 找出预测错误的索引
        incorrect_indices = tf.where(predicted_classes != true_classes).numpy().flatten()
        
        # 记录错误路径
        for idx in incorrect_indices:
            img_path = dataset.file_paths[idx]
            true_class = dataset.class_names[true_classes[idx]]
            pred_class = dataset.class_names[predicted_classes[idx]]
            error_record = f"{img_path}\t真实类别: {true_class}\t预测类别: {pred_class}"
            error_records.append(error_record)
            total_errors += 1
        
        total_images += len(images)
    
    # 将错误记录写入文件
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write('\n'.join(error_records))
    
    print(f"总图片数：{total_images}，错误预测数：{total_errors}，错误率：{total_errors/total_images:.2%}")
    print(f"错误预测的图片路径已记录到：{output_file}")
    
    return error_records

In [None]:
# 训练模型
model = get_MobileNetV1()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_dataset,validation_data=validation_dataset, epochs=30)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf_no_top.h5
[1m17225924/17225924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/30


In [None]:
# 记录预测错误的图片
record_training_errors(model, dataset_path, output_file='error_images.txt')

In [None]:
import requests
from PIL import Image
from io import BytesIO

# 图像预处理函数
def preprocess_image(image_url, target_size=(224, 224)):
    response = requests.get(image_url)
    if response.status_code != 200:
        raise ValueError("无法从链接加载图像，请检查链接是否有效！")
    image = Image.open(BytesIO(response.content))
    image = image.resize(target_size)
    image_array = np.array(image)
    if image_array.shape[-1] != 3:
        image_array = np.stack((image_array,) * 3, axis=-1)
    image_array = tf.keras.applications.mobilenet_v2.preprocess_input(image_array)
    image_array = np.expand_dims(image_array, axis=0)
    return image_array, image

# 预测函数（输出前 k 个预测结果）
def predict_top_k_from_url(model, image_url, class_names, top_k=10):
    # 预处理图像
    image_array, original_image = preprocess_image(image_url)
    
    # 使用模型进行预测
    predictions = model.predict(image_array)[0]  # 获取预测结果（形状为 [num_classes]）
    
    # 获取前 k 个最高概率的索引
    top_k_indices = np.argsort(predictions)[::-1][:top_k]
    
    # 获取前 k 个类别和对应的置信度
    top_k_labels = [class_names[i] for i in top_k_indices]
    top_k_confidences = [predictions[i] * 100 for i in top_k_indices]  # 转换为百分比
    
    # 可视化结果
    plt.imshow(original_image)
    plt.title("Top Predictions")
    plt.axis("off")
    plt.show()
    
    # 打印前 k 个预测结果
    print("Top Predictions:")
    for label, confidence in zip(top_k_labels, top_k_confidences):
        print(f"  {label}: {confidence:.2f}%")
    
    return top_k_labels, top_k_confidences

# 测试图片链接
image_url = "https://th.bing.com/th/id/OIP.1Bq2Px2DKc6YXem5eABF7QHaFj?w=252&h=189&c=7&r=0&o=5&dpr=1.3&pid=1.7"

# 输出前 10 个预测结果
top_k_labels, top_k_confidences = predict_top_k_from_url(model, image_url, class_names, top_k=10)