<a href="https://colab.research.google.com/github/Zhu-Pengming/Flora-Talks/blob/main/PlantDiseaseDetection_Image.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-2.19.1-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m24.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub>=0.21.2 (from datasets)
  Downloading huggingface_hub-0.23.0-py3-none-a

In [None]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import transforms
from datasets import load_dataset

def preprocess(example):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image = transform(example['image'])
    return {'image': image, 'label': example['label']}

# 加载数据集
dataset = load_dataset('ayerr/plant-disease-classification')
train_dataset = dataset['train'].map(preprocess)
val_dataset = dataset['validation'].map(preprocess)
test_dataset = dataset['test'].map(preprocess)

# 创建数据加载器
train_dataloader = DataLoader(train_dataset, batch_size=32)
val_dataloader = DataLoader(val_dataset, batch_size=32)
test_dataloader = DataLoader(test_dataset, batch_size=32)

# 定义模型
model = nn.Sequential(
    nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(64 * 56 * 56, 128),
    nn.ReLU(),
    nn.Linear(128, 2)  # 二元分类
)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# 训练模型
# 训练模型
for epoch in range(10):  # 10个epoch
    for i, batch in enumerate(train_dataloader, 0):
        inputs, labels = batch['image'], batch['label']
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

print('Finished Training')

In [None]:
importimport torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import transforms
from datasets import load_dataset
from PIL import Image
import torch.nn.functional as F
# 加载数据集
dataset = load_dataset("ayerr/plant-disease-classification")
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

class PlantDataset(torch.utils.data.Dataset):
    def __init__(self, dataset_split, transform=None):
        self.dataset_split = dataset_split
        self.transform = transform

    def __len__(self):
        return len(self.dataset_split)

    def __getitem__(self, idx):
        sample = self.dataset_split[idx]
        image = sample['image']
        # 如果 image 是路径字符串，打开图像
        if isinstance(image, str):
            image = Image.open(image)
        label = sample['label']
        if self.transform:
            image = self.transform(image)
        return image, label

train_dataset = PlantDataset(dataset['train'], transform=transform)
val_dataset = PlantDataset(dataset['validation'], transform=transform)
test_dataset = PlantDataset(dataset['test'], transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(128 * 16 * 16, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 16 * 16)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

num_classes = len(dataset['train'].features['label'].names)
model = CNN(num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train(model, train_loader, criterion, optimizer, epoch):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch}, Loss: {running_loss/len(train_loader)}')

def validate(model, val_loader, criterion):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Validation Accuracy: {100 * correct / total}%')

num_epochs = 10
for epoch in range(num_epochs):
    train(model, train_loader, criterion, optimizer, epoch)
    validate(model, val_loader, criterion)

# 测试模型
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Test Accuracy: {100 * correct / total}%')

test(model, test_loader)


Resolving data files:   0%|          | 0/194 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/198 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/152 [00:00<?, ?it/s]

Epoch 0, Loss: 0.701140684740884
Validation Accuracy: 63.63636363636363%
Epoch 1, Loss: 0.698278716632298
Validation Accuracy: 70.20202020202021%
Epoch 2, Loss: 0.5881521701812744
Validation Accuracy: 65.65656565656566%
Epoch 3, Loss: 0.5692638286522457
Validation Accuracy: 61.111111111111114%
Epoch 4, Loss: 0.417272795523916
Validation Accuracy: 67.17171717171718%
Epoch 5, Loss: 0.3334160934069327
Validation Accuracy: 66.16161616161617%
Epoch 6, Loss: 0.29504141211509705
Validation Accuracy: 64.14141414141415%
Epoch 7, Loss: 0.33355602834905895
Validation Accuracy: 68.68686868686869%
Epoch 8, Loss: 0.3245897452746119
Validation Accuracy: 56.06060606060606%
Epoch 9, Loss: 0.30364749474184854
Validation Accuracy: 64.14141414141415%
Test Accuracy: 65.78947368421052%


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from datasets import load_dataset
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# 加载数据集
dataset = load_dataset('NouRed/plant-disease-recognition', split='train')

# 提取图像和标签
images = np.array(dataset['image'])
labels = np.array(dataset['labels'])

# 将标签转换为one-hot编码
num_classes = len(np.unique(labels))
labels = to_categorical(labels, num_classes=num_classes)

# 分割数据集为训练集和验证集
train_images, val_images, train_labels, val_labels = train_test_split(
    images, labels, test_size=0.2, random_state=42
)
import tensorflow as tf

def preprocess_image(image):
    image = tf.image.resize(image, [150, 150])
    image = image / 255.0  # 归一化
    return image

# 处理训练和验证数据
train_images = np.array([preprocess_image(img) for img in train_images])
val_images = np.array([preprocess_image(img) for img in val_images])
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer=Adam(lr=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(
    train_images, train_labels,
    epochs=25,
    validation_data=(val_images, val_labels),
    batch_size=32
)

# 绘制训练和验证的准确率和损失
import matplotlib.pyplot as plt

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()
model.save('plant_disease_model.h5')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/324 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/398M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/390M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/381M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1322 [00:00<?, ? examples/s]

In [None]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import transforms
from datasets import load_dataset
from PIL import Image

# 加载数据集
dataset = load_dataset("ayerr/plant-disease-classification")

# 查看数据集结构
print(dataset)
print(dataset['train'][0])


Resolving data files:   0%|          | 0/194 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/198 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/152 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 194
    })
    validation: Dataset({
        features: ['image', 'label'],
        num_rows: 198
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 152
    })
})
{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=224x224 at 0x7AF765B9D2A0>, 'label': 0}
