In [None]:
import os
from pathlib import Path
import builtins
import json
import pandas as pd
import shutil

# 1. Mount Google Drive (if not already)
from google.colab import drive
if not os.path.exists('/content/drive'):
    print("Mounting Google Drive...")
    drive.mount('/content/drive')
    print("Google Drive mounted!")
else:
    print("Google Drive already mounted.")

# 2. Define base project directory on Drive (change if needed)
BASE_DIR = Path("/content/drive/MyDrive/intelligent_pesticide_system")

# 3. Change working directory to project root (optional)
os.chdir(str(BASE_DIR))
print(f"Working directory set to project root: {os.getcwd()}")

# 4. Patch built-in open() to redirect file paths under BASE_DIR automatically,
#    unless absolute path already points to BASE_DIR or special paths.

original_open = builtins.open

def patched_open(file, mode='r', buffering=-1, encoding=None,
                 errors=None, newline=None, closefd=True, opener=None):
    fpath = file
    if isinstance(file, str):
        if not (file.startswith(str(BASE_DIR)) or os.path.isabs(file)):
            # Redirect relative paths inside BASE_DIR
            fpath = BASE_DIR / file
    elif isinstance(file, Path):
        if not file.is_absolute():
            fpath = BASE_DIR / file
        else:
            fpath = file
    else:
        fpath = file  # If not str or Path, keep as is

    # Ensure parent directories exist for writing
    if 'w' in mode or 'a' in mode or 'x' in mode:
        os.makedirs(Path(fpath).parent, exist_ok=True)

    return original_open(fpath, mode, buffering, encoding, errors, newline, closefd, opener)

builtins.open = patched_open

# 5. Patch pandas read_csv and to_csv similarly

original_read_csv = pd.read_csv
def patched_read_csv(filepath_or_buffer, *args, **kwargs):
    if isinstance(filepath_or_buffer, str):
        if not filepath_or_buffer.startswith(str(BASE_DIR)):
            filepath_or_buffer = str(BASE_DIR / filepath_or_buffer)
    return original_read_csv(filepath_or_buffer, *args, **kwargs)
pd.read_csv = patched_read_csv

original_to_csv = pd.DataFrame.to_csv
def patched_to_csv(self, path_or_buf=None, *args, **kwargs):
    if isinstance(path_or_buf, str) and not path_or_buf.startswith(str(BASE_DIR)):
        path_or_buf = str(BASE_DIR / path_or_buf)
    os.makedirs(Path(path_or_buf).parent, exist_ok=True)
    return original_to_csv(self, path_or_buf, *args, **kwargs)
pd.DataFrame.to_csv = patched_to_csv

# 6. Patch torch.save similarly if using PyTorch

try:
    import torch

    original_torch_save = torch.save

    def patched_torch_save(obj, f, *args, **kwargs):
        if isinstance(f, str):
            if not f.startswith(str(BASE_DIR)):
                f = str(BASE_DIR / f)
            os.makedirs(Path(f).parent, exist_ok=True)
        return original_torch_save(obj, f, *args, **kwargs)

    torch.save = patched_torch_save
except ImportError:
    print("PyTorch not installed, skipping torch.save patch")

# 7. Patch matplotlib.pyplot.savefig to save inside the project folder automatically

import matplotlib.pyplot as plt
original_savefig = plt.savefig

def patched_savefig(fname, *args, **kwargs):
    if isinstance(fname, str):
        if not fname.startswith(str(BASE_DIR)):
            fname = str(BASE_DIR / fname)
        os.makedirs(Path(fname).parent, exist_ok=True)
    return original_savefig(fname, *args, **kwargs)

plt.savefig = patched_savefig

print("Universal drive path redirection is active. All file reads/writes go to your Drive folder!")


Mounting Google Drive...
Mounted at /content/drive
Google Drive mounted!
Working directory set to project root: /content/drive/MyDrive/intelligent_pesticide_system
Universal drive path redirection is active. All file reads/writes go to your Drive folder!


In [None]:
import sys
import torch
import platform
import subprocess
import pkg_resources
import warnings # Import warnings

print("🔍 SYSTEM INFORMATION:")
print("=" * 50)
print(f"Python Version: {sys.version}")
print(f"Platform: {platform.platform()}")
print(f"Architecture: {platform.architecture()}")

# GPU Information
print(f"\n🖥️  GPU INFORMATION:")
print("=" * 50)
if torch.cuda.is_available():
    print(f"✅ CUDA Available: {torch.cuda.is_available()}")
    print(f"CUDA Version: {torch.version.cuda}")
    print(f"GPU Device: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory // 1024**3} GB")
    print(f"Current GPU: {torch.cuda.current_device()}")
else:
    print("❌ CUDA not available - using CPU")
    print("⚠️  Training will be slower on CPU")


warnings.filterwarnings('ignore')

🔍 SYSTEM INFORMATION:
Python Version: 3.12.11 (main, Jun  4 2025, 08:56:18) [GCC 11.4.0]
Platform: Linux-6.6.97+-x86_64-with-glibc2.35
Architecture: ('64bit', 'ELF')

🖥️  GPU INFORMATION:
✅ CUDA Available: True
CUDA Version: 12.6
GPU Device: Tesla T4
GPU Memory: 14 GB
Current GPU: 0


In [None]:
# Install main packages
packages_to_install = [
    "torch>=2.0.0",
    "torchvision>=0.15.0",
    "albumentations>=1.3.0",
    "segmentation-models-pytorch>=0.3.0",
    "timm>=0.9.0",
    "opencv-python>=4.8.0",
    "scikit-learn>=1.3.0",
    "matplotlib>=3.7.0",
    "seaborn>=0.12.0",
    "tqdm>=4.65.0",
    "tensorboard>=2.13.0"
]

print("📦 INSTALLING DEPENDENCIES:")
print("=" * 50)

for package in packages_to_install:
    try:
        print(f"Installing {package}...")
        !pip install {package} --quiet
        print(f"✅ {package} installed successfully")
    except Exception as e:
        print(f"❌ Error installing {package}: {e}")

print("\n🎉 Installation complete!")


📦 INSTALLING DEPENDENCIES:
Installing torch>=2.0.0...
✅ torch>=2.0.0 installed successfully
Installing torchvision>=0.15.0...
✅ torchvision>=0.15.0 installed successfully
Installing albumentations>=1.3.0...
✅ albumentations>=1.3.0 installed successfully
Installing segmentation-models-pytorch>=0.3.0...
✅ segmentation-models-pytorch>=0.3.0 installed successfully
Installing timm>=0.9.0...
✅ timm>=0.9.0 installed successfully
Installing opencv-python>=4.8.0...
✅ opencv-python>=4.8.0 installed successfully
Installing scikit-learn>=1.3.0...
✅ scikit-learn>=1.3.0 installed successfully
Installing matplotlib>=3.7.0...
✅ matplotlib>=3.7.0 installed successfully
Installing seaborn>=0.12.0...
✅ seaborn>=0.12.0 installed successfully
Installing tqdm>=4.65.0...
✅ tqdm>=4.65.0 installed successfully
Installing tensorboard>=2.13.0...
✅ tensorboard>=2.13.0 installed successfully

🎉 Installation complete!


In [None]:
# Core libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

# Computer Vision & Augmentation
import albumentations as A
from albumentations.pytorch import ToTensorV2
import segmentation_models_pytorch as smp
import timm
import cv2

# Data Science
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# Utilities
import os
import json
from pathlib import Path
from tqdm import tqdm
import warnings
import time
from collections import defaultdict

warnings.filterwarnings('ignore')

print("📚 LIBRARY VERSIONS")
print("=" * 50)
libraries = {
    'torch': torch.__version__,
    'torchvision': torch.version.__version__ if hasattr(torch.version, '__version__') else 'N/A',
    'numpy': np.__version__,
    'pandas': pd.__version__,
    'opencv': cv2.__version__,
    'matplotlib': plt.matplotlib.__version__ if hasattr(plt, 'matplotlib') else 'N/A',
}

for lib, version in libraries.items():
    print(f"{lib}: {version}")

print("\n✅ All libraries imported successfully!")


📚 LIBRARY VERSIONS
torch: 2.8.0+cu126
torchvision: 2.8.0+cu126
numpy: 2.0.2
pandas: 2.2.2
opencv: 4.12.0
matplotlib: 3.10.0

✅ All libraries imported successfully!


In [None]:
import os
import json
from pathlib import Path

class ProjectConfig:
    def __init__(self):
        current_dir = Path.cwd()

        if current_dir.name == 'notebooks':
            self.BASE_DIR = current_dir.parent
            self.NOTEBOOKS_DIR = current_dir
        else:
            self.BASE_DIR = current_dir
            self.NOTEBOOKS_DIR = self.BASE_DIR / 'notebooks'

        # All main directories should be at project root level
        self.DATA_DIR = self.BASE_DIR / "data"
        self.RAW_DATA_DIR = self.DATA_DIR / "raw"
        self.PROCESSED_DATA_DIR = self.DATA_DIR / "processed"
        self.MODELS_DIR = self.BASE_DIR / "models"
        self.RESULTS_DIR = self.BASE_DIR / "results"
        self.SRC_DIR = self.BASE_DIR / "src"
        self.CONFIGS_DIR = self.BASE_DIR / "configs"

        # Device setup
        try:
            import torch
            self.DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        except ImportError:
            self.DEVICE = 'cpu'

        # Model parameters
        self.IMAGE_SIZE = 512
        self.BATCH_SIZE = 8
        self.NUM_WORKERS = 4

        # Training parameters
        self.LEARNING_RATE = 1e-4
        self.NUM_EPOCHS = 50
        self.EARLY_STOPPING_PATIENCE = 10

        # Dataset parameters
        self.NUM_CLASSES = 4
        self.SEVERITY_LABELS = {0: 'healthy', 1: 'curl', 2: 'spot', 3: 'slug'}  # Updated based on DiaMOS

        # Spray decision thresholds
        self.LOW_SPRAY_THRESHOLD = 15.0
        self.HIGH_SPRAY_THRESHOLD = 30.0

        # Data split ratios
        self.TRAIN_RATIO = 0.7
        self.VAL_RATIO = 0.2
        self.TEST_RATIO = 0.1

    def create_directories(self):
        """Create necessary directories based on actual dataset structures"""
        directories = [
            # Core directories
            self.DATA_DIR,
            self.RAW_DATA_DIR,
            self.PROCESSED_DATA_DIR,
            self.MODELS_DIR,
            self.RESULTS_DIR,
            self.SRC_DIR,
            self.CONFIGS_DIR,


            self.RAW_DATA_DIR / "plantseg",

            self.RAW_DATA_DIR / "diamos",

            # Processed data directories
            self.PROCESSED_DATA_DIR / "segmentation" / "train",
            self.PROCESSED_DATA_DIR / "segmentation" / "val",
            self.PROCESSED_DATA_DIR / "segmentation" / "test",
            self.PROCESSED_DATA_DIR / "classification" / "train",
            self.PROCESSED_DATA_DIR / "classification" / "val",
            self.PROCESSED_DATA_DIR / "classification" / "test",

            # Model directories
            self.MODELS_DIR / "checkpoints",
            self.MODELS_DIR / "architectures",
            self.MODELS_DIR / "pretrained",

            # Results directories
            self.RESULTS_DIR / "logs" / "tensorboard",
            self.RESULTS_DIR / "logs" / "training",
            self.RESULTS_DIR / "visualizations" / "training_curves",
            self.RESULTS_DIR / "visualizations" / "predictions",
            self.RESULTS_DIR / "metrics",
            self.RESULTS_DIR / "reports",

            # Source code directories
            self.SRC_DIR / "data",
            self.SRC_DIR / "models",
            self.SRC_DIR / "training",
            self.SRC_DIR / "inference",
            self.SRC_DIR / "utils",

            # Metadata directory
            self.DATA_DIR / "metadata",

            # Notebooks directory
            self.NOTEBOOKS_DIR,

            # Additional directories
            self.BASE_DIR / "docs",
            self.BASE_DIR / "scripts",
            self.BASE_DIR / "tests"
        ]

        print("📁 Creating project directory structure...")
        for directory in directories:
            directory.mkdir(parents=True, exist_ok=True)

        # Create __init__.py files for Python packages
        init_files = [
            self.SRC_DIR / "__init__.py",
            self.SRC_DIR / "data" / "__init__.py",
            self.SRC_DIR / "models" / "__init__.py",
            self.SRC_DIR / "training" / "__init__.py",
            self.SRC_DIR / "inference" / "__init__.py",
            self.SRC_DIR / "utils" / "__init__.py"
        ]

        for init_file in init_files:
            init_file.touch(exist_ok=True)

        print("✅ Directory structure created successfully!")

    def print_config(self):
        """Print current configuration"""
        print("⚙️  PROJECT CONFIGURATION")
        print("=" * 60)
        print(f"Project Root: {self.BASE_DIR}")
        print(f"Notebooks Dir: {self.NOTEBOOKS_DIR}")
        print(f"Data Directory: {self.DATA_DIR}")
        print(f"Models Directory: {self.MODELS_DIR}")
        print(f"Results Directory: {self.RESULTS_DIR}")
        print(f"Device: {self.DEVICE}")
        print(f"Image Size: {self.IMAGE_SIZE}")
        print(f"Batch Size: {self.BATCH_SIZE}")
        print(f"Learning Rate: {self.LEARNING_RATE}")
        print(f"Num Epochs: {self.NUM_EPOCHS}")
        print(f"Num Classes: {self.NUM_CLASSES}")
        print(f"Severity Labels: {self.SEVERITY_LABELS}")
        print(f"Spray Thresholds: Low={self.LOW_SPRAY_THRESHOLD}%, High={self.HIGH_SPRAY_THRESHOLD}%")
        print(f"Data Splits: Train={self.TRAIN_RATIO}, Val={self.VAL_RATIO}, Test={self.TEST_RATIO}")

    def save_config(self):
        """Save configuration to JSON file"""
        config_dict = {
            'paths': {
                'base_dir': str(self.BASE_DIR),
                'data_dir': str(self.DATA_DIR),
                'raw_data_dir': str(self.RAW_DATA_DIR),
                'processed_data_dir': str(self.PROCESSED_DATA_DIR),
                'models_dir': str(self.MODELS_DIR),
                'results_dir': str(self.RESULTS_DIR),
                'notebooks_dir': str(self.NOTEBOOKS_DIR),
                'metadata_dir': str(self.DATA_DIR / "metadata")
            },
            'device': str(self.DEVICE),
            'model_params': {
                'image_size': self.IMAGE_SIZE,
                'batch_size': self.BATCH_SIZE,
                'num_workers': self.NUM_WORKERS
            },
            'training_params': {
                'learning_rate': self.LEARNING_RATE,
                'num_epochs': self.NUM_EPOCHS,
                'early_stopping_patience': self.EARLY_STOPPING_PATIENCE
            },
            'dataset_params': {
                'num_classes': self.NUM_CLASSES,
                'severity_labels': self.SEVERITY_LABELS,
                'spray_thresholds': {
                    'low': self.LOW_SPRAY_THRESHOLD,
                    'high': self.HIGH_SPRAY_THRESHOLD
                },
                'data_splits': {
                    'train': self.TRAIN_RATIO,
                    'val': self.VAL_RATIO,
                    'test': self.TEST_RATIO
                }
            },
            'dataset_info': {
                'plantseg': {
                    'type': 'segmentation',
                    'has_pretrained_splits': True,
                    'structure': 'images/{train,val,test}/, annotations/{train,val,test}/, json/'
                },
                'diamos': {
                    'type': 'classification',
                    'categories': ['healthy', 'curl', 'spot', 'slug'],
                    'structure': 'leaves/{healthy,curl,spot,slug}/, fruits/, annotation/csv/'
                }
            }
        }

        config_file = self.CONFIGS_DIR / 'runtime_config.json'
        with open(config_file, 'w') as f:
            json.dump(config_dict, f, indent=2)

        print(f"💾 Configuration saved to: {config_file}")
        return config_file

# Initialize and create configuration
config = ProjectConfig()
config.create_directories()
config.print_config()
config.save_config()


📁 Creating project directory structure...
✅ Directory structure created successfully!
⚙️  PROJECT CONFIGURATION
Project Root: /content/drive/MyDrive/intelligent_pesticide_system
Notebooks Dir: /content/drive/MyDrive/intelligent_pesticide_system/notebooks
Data Directory: /content/drive/MyDrive/intelligent_pesticide_system/data
Models Directory: /content/drive/MyDrive/intelligent_pesticide_system/models
Results Directory: /content/drive/MyDrive/intelligent_pesticide_system/results
Device: cuda
Image Size: 512
Batch Size: 8
Learning Rate: 0.0001
Num Epochs: 50
Num Classes: 4
Severity Labels: {0: 'healthy', 1: 'curl', 2: 'spot', 3: 'slug'}
Spray Thresholds: Low=15.0%, High=30.0%
Data Splits: Train=0.7, Val=0.2, Test=0.1
💾 Configuration saved to: /content/drive/MyDrive/intelligent_pesticide_system/configs/runtime_config.json


PosixPath('/content/drive/MyDrive/intelligent_pesticide_system/configs/runtime_config.json')

In [None]:
print("🧪 TESTING PYTORCH FUNCTIONALITY")
print("=" * 50)

# Test tensor operations
print("Testing tensor operations...")
x = torch.randn(3, 224, 224)
print(f"✅ Created tensor with shape: {x.shape}")

# Test GPU operations if available
if torch.cuda.is_available():
    print("\nTesting GPU operations...")
    x_gpu = x.to(config.DEVICE)
    y_gpu = torch.randn(3, 224, 224).to(config.DEVICE)
    z_gpu = x_gpu + y_gpu
    print(f"✅ GPU tensor operations successful")
    print(f"Result tensor device: {z_gpu.device}")

    # Test memory allocation
    print(f"\nGPU Memory Usage:")
    print(f"Allocated: {torch.cuda.memory_allocated() / 1024**2:.1f} MB")
    print(f"Reserved: {torch.cuda.memory_reserved() / 1024**2:.1f} MB")
else:
    print("⚠️  Running on CPU - GPU tests skipped")

# Test a simple neural network
print("\nTesting neural network creation...")
test_model = nn.Sequential(
    nn.Conv2d(3, 16, 3, padding=1),
    nn.ReLU(),
    nn.AdaptiveAvgPool2d(1),
    nn.Flatten(),
    nn.Linear(16, 4)
).to(config.DEVICE)

# Test forward pass
test_input = torch.randn(2, 3, 64, 64).to(config.DEVICE)
with torch.no_grad():
    test_output = test_model(test_input)

print(f"✅ Test model forward pass successful")
print(f"Input shape: {test_input.shape}")
print(f"Output shape: {test_output.shape}")

# Clean up
del test_model, test_input, test_output
if torch.cuda.is_available():
    torch.cuda.empty_cache()

print("\n🎉 All PyTorch functionality tests passed!")


🧪 TESTING PYTORCH FUNCTIONALITY
Testing tensor operations...
✅ Created tensor with shape: torch.Size([3, 224, 224])

Testing GPU operations...
✅ GPU tensor operations successful
Result tensor device: cuda:0

GPU Memory Usage:
Allocated: 1.7 MB
Reserved: 2.0 MB

Testing neural network creation...
✅ Test model forward pass successful
Input shape: torch.Size([2, 3, 64, 64])
Output shape: torch.Size([2, 4])

🎉 All PyTorch functionality tests passed!


In [None]:
from pathlib import Path

def check_project_structure():
    """Verify project structure based on actual dataset formats"""
    print("📁 CHECKING PROJECT STRUCTURE (UPDATED FOR REAL DATASETS)")
    print("=" * 70)

    # Determine project root correctly
    current_dir = Path.cwd()
    if current_dir.name == 'notebooks':
        project_root = current_dir.parent
        print(f"🔍 Running from notebooks folder")
    else:
        project_root = current_dir
        print(f"🔍 Running from project root")

    print(f"📂 Project root: {project_root}")
    print("-" * 70)

    # Updated required structure based on actual datasets
    required_structure = {
        "data": [
            "raw/plantseg",         # PlantSeg will have its own internal structure
            "raw/diamos",           # DiaMOS will have its own internal structure
            "processed/segmentation",
            "processed/classification",
            "metadata"
        ],
        "models": ["checkpoints", "architectures", "pretrained"],
        "results": ["logs", "visualizations", "metrics", "reports"],
        "src": ["data", "models", "training", "inference", "utils"],
        "configs": [],
        "notebooks": [],
        "docs": [],
        "scripts": [],
        "tests": []
    }

    all_exist = True
    missing_dirs = []

    for main_dir, sub_dirs in required_structure.items():
        main_path = project_root / main_dir

        if main_path.exists():
            print(f"✅ {main_dir}/")

            # Check subdirectories
            for sub_dir in sub_dirs:
                sub_path = main_path / sub_dir
                if sub_path.exists():
                    print(f"   ✅ {sub_dir}/")
                else:
                    print(f"   ❌ {sub_dir}/ - MISSING")
                    missing_dirs.append(f"{main_dir}/{sub_dir}")
                    all_exist = False
        else:
            print(f"❌ {main_dir}/ - MISSING")
            missing_dirs.append(main_dir)
            all_exist = False

    # Special checks for actual datasets
    print(f"\n📊 CHECKING ACTUAL DATASETS:")

    # Check PlantSeg structure
    plantseg_path = project_root / "data" / "raw" / "plantseg"
    if plantseg_path.exists():
        expected_plantseg_dirs = ["images", "annotations", "json"]
        plantseg_structure_ok = True

        for expected_dir in expected_plantseg_dirs:
            dir_path = plantseg_path / expected_dir
            if dir_path.exists():
                # Count subdirectories (train/val/test)
                subdirs = [d for d in dir_path.iterdir() if d.is_dir()]
                print(f"   ✅ plantseg/{expected_dir}/ ({len(subdirs)} subdirs)")
            else:
                print(f"   ❌ plantseg/{expected_dir}/ - MISSING")
                plantseg_structure_ok = False

        if plantseg_structure_ok:
            print(f"   🎉 PlantSeg structure looks good!")
        else:
            print(f"   ⚠️  PlantSeg structure incomplete")
    else:
        print(f"   ❌ PlantSeg dataset not found at {plantseg_path}")

    # Check DiaMOS structure
    diamos_path = project_root / "data" / "raw" / "diamos"
    if diamos_path.exists():
        expected_diamos_dirs = ["leaves", "annotation"]
        diamos_structure_ok = True

        for expected_dir in expected_diamos_dirs:
            dir_path = diamos_path / expected_dir
            if dir_path.exists():
                if expected_dir == "leaves":
                    # Check for severity categories
                    categories = [d for d in dir_path.iterdir() if d.is_dir()]
                    print(f"   ✅ diamos/leaves/ ({len(categories)} categories)")
                elif expected_dir == "annotation":
                    csv_file = dir_path / "csv" / "diaMOSPlant.csv"
                    if csv_file.exists():
                        print(f"   ✅ diamos/annotation/csv/ (CSV found)")
                    else:
                        print(f"   ⚠️  diamos/annotation/csv/ (no CSV)")
            else:
                print(f"   ❌ diamos/{expected_dir}/ - MISSING")
                diamos_structure_ok = False

        if diamos_structure_ok:
            print(f"   🎉 DiaMOS structure looks good!")
        else:
            print(f"   ⚠️  DiaMOS structure incomplete")
    else:
        print(f"   ❌ DiaMOS dataset not found at {diamos_path}")

    # Check for wrongly placed directories
    if (project_root / 'notebooks').exists():
        notebooks_dir = project_root / 'notebooks'
        wrong_locations = []

        for dir_name in ['data', 'models', 'results', 'src']:
            wrong_path = notebooks_dir / dir_name
            if wrong_path.exists():
                wrong_locations.append(f"notebooks/{dir_name}")

        if wrong_locations:
            print(f"\n⚠️  DIRECTORIES IN WRONG LOCATIONS:")
            for wrong_dir in wrong_locations:
                print(f"   📁 {wrong_dir} (should be at project root)")

    if all_exist:
        print(f"\n🎉 Project structure is complete!")
    else:
        print(f"\n⚠️  Missing directories: {len(missing_dirs)}")

        # Auto-create missing directories
        print("Creating missing directories...")
        for main_dir, sub_dirs in required_structure.items():
            main_path = project_root / main_dir
            main_path.mkdir(exist_ok=True)

            for sub_dir in sub_dirs:
                sub_path = main_path / sub_dir
                sub_path.mkdir(parents=True, exist_ok=True)

        print("✅ Missing directories created!")

    return all_exist, project_root

# Run the updated check
structure_ok, project_root = check_project_structure()

# Display the updated project tree
def display_project_tree(directory, max_depth=3):
    """Display project tree structure with more depth for datasets"""
    def _display_tree(path, prefix="", current_depth=0):
        if current_depth >= max_depth:
            return

        items = sorted([item for item in path.iterdir() if item.is_dir()])

        # Limit items shown to avoid clutter
        display_items = items[:8] if current_depth == 0 else items[:5]

        for i, item in enumerate(display_items):
            is_last = i == len(display_items) - 1
            current_prefix = "└── " if is_last else "├── "

            # Show file count for leaf directories
            if current_depth == max_depth - 1:
                try:
                    file_count = len([f for f in item.iterdir() if f.is_file()])
                    dir_count = len([d for d in item.iterdir() if d.is_dir()])
                    info = f" ({file_count} files, {dir_count} dirs)" if file_count > 0 or dir_count > 0 else ""
                except:
                    info = ""
                print(f"{prefix}{current_prefix}{item.name}/{info}")
            else:
                print(f"{prefix}{current_prefix}{item.name}/")

            if current_depth < max_depth - 1:
                extension = "    " if is_last else "│   "
                _display_tree(item, prefix + extension, current_depth + 1)

        if len(items) > len(display_items):
            remaining = len(items) - len(display_items)
            print(f"{prefix}    ... and {remaining} more directories")

    print(f"\n📂 PROJECT TREE STRUCTURE:")
    print("=" * 50)
    print(f"{directory.name}/")
    _display_tree(directory)

if structure_ok:
    display_project_tree(project_root)


📁 CHECKING PROJECT STRUCTURE (UPDATED FOR REAL DATASETS)
🔍 Running from project root
📂 Project root: /content/drive/MyDrive/intelligent_pesticide_system
----------------------------------------------------------------------
✅ data/
   ✅ raw/plantseg/
   ✅ raw/diamos/
   ✅ processed/segmentation/
   ✅ processed/classification/
   ✅ metadata/
✅ models/
   ✅ checkpoints/
   ✅ architectures/
   ✅ pretrained/
✅ results/
   ✅ logs/
   ✅ visualizations/
   ✅ metrics/
   ✅ reports/
✅ src/
   ✅ data/
   ✅ models/
   ✅ training/
   ✅ inference/
   ✅ utils/
✅ configs/
✅ notebooks/
✅ docs/
✅ scripts/
✅ tests/

📊 CHECKING ACTUAL DATASETS:
   ✅ plantseg/images/ (3 subdirs)
   ✅ plantseg/annotations/ (3 subdirs)
   ✅ plantseg/json/ (1 subdirs)
   🎉 PlantSeg structure looks good!
   ✅ diamos/leaves/ (4 categories)
   ✅ diamos/annotation/csv/ (CSV found)
   🎉 DiaMOS structure looks good!

🎉 Project structure is complete!

📂 PROJECT TREE STRUCTURE:
intelligent_pesticide_system/
├── .ipynb_checkpoints/
├

In [None]:
print("📋 ENVIRONMENT SETUP SUMMARY (UPDATED)")
print("=" * 70)

# System check
try:
    import torch
    system_status = "✅ Ready" if torch.cuda.is_available() else "⚠️  CPU Only"
except ImportError:
    system_status = "❌ PyTorch not found"

print(f"System Status: {system_status}")

# Dependencies check
try:
    import segmentation_models_pytorch as smp
    import timm
    import albumentations as A
    deps_status = "✅ Complete"
except ImportError as e:
    deps_status = f"❌ Missing: {e}"

print(f"Dependencies: {deps_status}")

# Project structure check
structure_status = "✅ Complete" if structure_ok else "❌ Incomplete"
print(f"Project Structure: {structure_status}")

# Dataset status check
plantseg_status = "✅ Found" if (project_root / "data" / "raw" / "plantseg" / "images").exists() else "❌ Missing"
diamos_status = "✅ Found" if (project_root / "data" / "raw" / "diamos" / "leaves").exists() else "❌ Missing"

print(f"PlantSeg Dataset: {plantseg_status}")
print(f"DiaMOS Dataset: {diamos_status}")

print(f"\n🎯 DATASET INFORMATION:")
print(f"📊 PlantSeg: Segmentation dataset with train/val/test splits")
print(f"   - Structure: images/{{train,val,test}}/ + annotations/{{train,val,test}}/")  # Fixed: Use curly braces in string
print(f"   - Format: JPG images + PNG masks + JSON annotations")

print(f"📊 DiaMOS: Classification dataset with severity levels")
print(f"   - Structure: leaves/{{healthy,curl,spot,slug}}/ + annotation/csv/")  # Fixed: Use curly braces in string
print(f"   - Format: JPG images + CSV metadata")

print(f"\n🎯 DATASET CHECK:")
if plantseg_status == "❌ Missing":
    print("❗ Download PlantSeg dataset to data/raw/plantseg/")
if diamos_status == "❌ Missing":
    print("❗ Download DiaMOS dataset to data/raw/diamos/")

if plantseg_status == "✅ Found" and diamos_status == "✅ Found":
    print("✅ All datasets ready!")
else:
    print("1. Download missing datasets")
    print("2. Re-run this notebook to verify setup")

# Save final configuration with dataset status
final_config = {
    'setup_complete': True,
    'system_ready': system_status == "✅ Ready",
    'dependencies_ready': deps_status == "✅ Complete",
    'structure_ready': structure_status == "✅ Complete",
    'plantseg_found': plantseg_status == "✅ Found",
    'diamos_found': diamos_status == "✅ Found",
    'ready_for_data_prep': (plantseg_status == "✅ Found" and diamos_status == "✅ Found")
}

# Save setup status
setup_file = project_root / "configs" / "setup_status.json"
with open(setup_file, 'w') as f:
    json.dump(final_config, f, indent=2)

print(f"\n💾 Setup status saved to: {setup_file}")

if final_config['ready_for_data_prep']:
    print(f"\n🚀 Environment setup complete!")
else:
    print(f"\n⚠️  Setup incomplete. Please resolve issues above before proceeding.")


📋 ENVIRONMENT SETUP SUMMARY (UPDATED)
System Status: ✅ Ready
Dependencies: ✅ Complete
Project Structure: ✅ Complete
PlantSeg Dataset: ✅ Found
DiaMOS Dataset: ✅ Found

🎯 DATASET INFORMATION:
📊 PlantSeg: Segmentation dataset with train/val/test splits
   - Structure: images/{train,val,test}/ + annotations/{train,val,test}/
   - Format: JPG images + PNG masks + JSON annotations
📊 DiaMOS: Classification dataset with severity levels
   - Structure: leaves/{healthy,curl,spot,slug}/ + annotation/csv/
   - Format: JPG images + CSV metadata

🎯 DATASET CHECK:
✅ All datasets ready!

💾 Setup status saved to: /content/drive/MyDrive/intelligent_pesticide_system/configs/setup_status.json

🚀 Environment setup complete!
