In [27]:
!unzip Plant-AI-Doctor.zip

unzip:  cannot find or open Plant-AI-Doctor.zip, Plant-AI-Doctor.zip.zip or Plant-AI-Doctor.zip.ZIP.


In [28]:
!ls

sample_data


In [29]:
!ls Plant-AI-Doctor



ls: cannot access 'Plant-AI-Doctor': No such file or directory


In [30]:
!ls data


ls: cannot access 'data': No such file or directory


In [31]:
!ls Plant-AI-Doctor/data

ls: cannot access 'Plant-AI-Doctor/data': No such file or directory


In [32]:
!ls Plant-AI-Doctor/data/images


ls: cannot access 'Plant-AI-Doctor/data/images': No such file or directory


In [33]:
!ls Plant-AI-Doctor/data/images/real


ls: cannot access 'Plant-AI-Doctor/data/images/real': No such file or directory


In [34]:
from pathlib import Path

BASE = Path("Plant-AI-Doctor/data/images/real")
chem_dir = BASE / "chemical"
dis_dir  = BASE / "disease"

print("BASE exists:", BASE.exists())
print("chemical exists:", chem_dir.exists())
print("disease exists:", dis_dir.exists())

print("chemical count:", len(list(chem_dir.iterdir())))
print("disease count:", len(list(dis_dir.iterdir())))


BASE exists: False
chemical exists: False
disease exists: False


FileNotFoundError: [Errno 2] No such file or directory: 'Plant-AI-Doctor/data/images/real/chemical'

In [None]:
import pandas as pd

def list_images(folder: Path):
    exts = {".jpg", ".jpeg", ".png", ".webp"}
    return [p for p in folder.iterdir() if p.suffix.lower() in exts]

rows = []

for p in list_images(chem_dir):
    rows.append({"filepath": str(p), "label": "chemical", "source": "real"})

for p in list_images(dis_dir):
    rows.append({"filepath": str(p), "label": "biological", "source": "real"})

df = pd.DataFrame(rows)

print("Total:", len(df))
print(df["label"].value_counts())
df.head(5)


In [None]:
import numpy as np

def split_group(df_class, train=0.7, val=0.15):
    df_class = df_class.sample(frac=1, random_state=42)  # ערבוב
    n = len(df_class)

    train_end = int(n * train)
    val_end = int(n * (train + val))

    splits = (
        ["train"] * train_end +
        ["val"] * (val_end - train_end) +
        ["test"] * (n - val_end)
    )

    df_class = df_class.copy()
    df_class["split"] = splits
    return df_class

df = df.groupby("label", group_keys=False).apply(split_group)

df["split"].value_counts(), df.groupby(["label", "split"]).size()


In [None]:
from PIL import Image
import matplotlib.pyplot as plt
import random

def show_samples(df, label, n=4):
    samples = df[df["label"] == label].sample(n)
    plt.figure(figsize=(10, 3))
    for i, row in enumerate(samples.itertuples()):
        img = Image.open(row.filepath)
        plt.subplot(1, n, i+1)
        plt.imshow(img)
        plt.axis("off")
    plt.suptitle(label)
    plt.show()

show_samples(df, "chemical")
show_samples(df, "biological")

In [None]:
from torchvision import transforms

image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])


In [None]:
from torch.utils.data import Dataset
from PIL import Image
import torch


In [None]:

class LeafDataset(Dataset):
    def __init__(self, df, split, transform=None):
        self.df = df[df["split"] == split].reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(row.filepath).convert("RGB")

        if self.transform is not None:
            image = self.transform(image)

        label = 1 if row.label == "chemical" else 0
        return image, label


In [None]:
from torch.utils.data import DataLoader

In [None]:
train_dataset = LeafDataset(df, split="train", transform=image_transform)
val_dataset   = LeafDataset(df, split="val", transform=image_transform)
test_dataset  = LeafDataset(df, split="test", transform=image_transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [None]:
import torch
import torch.nn as nn
from torchvision import models

model = models.resnet18(pretrained=True)

In [None]:
model.fc = nn.Linear(model.fc.in_features, 2)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

print("Using device:", device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [None]:
def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(loader)

In [None]:
num_epochs = 5

for epoch in range(num_epochs):
    train_loss = train_one_epoch(
        model,
        train_loader,
        criterion,
        optimizer,
        device
    )
    print(f"Epoch {epoch+1}/{num_epochs} | Train loss: {train_loss:.4f}")

In [None]:
def evaluate(model, loader, device):
    model.eval()  # מצב בדיקה
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            preds = outputs.argmax(dim=1)

            correct += (preds == labels).sum().item()
            total += labels.size(0)

    return correct / total

In [None]:
val_acc = evaluate(model, val_loader, device)
print(f"Validation accuracy: {val_acc:.3f}")

In [None]:
len(val_dataset)

In [None]:
train_paths = set(train_dataset.df.filepath)
val_paths   = set(val_dataset.df.filepath)

len(train_paths.intersection(val_paths))

In [None]:
test_acc = evaluate(model, test_loader, device)
print(f"Test accuracy: {test_acc:.3f}")