In [7]:
!pip install torchvision

Collecting torchvision
  Downloading torchvision-0.23.0-cp312-cp312-win_amd64.whl.metadata (6.1 kB)
Collecting torch==2.8.0 (from torchvision)
  Downloading torch-2.8.0-cp312-cp312-win_amd64.whl.metadata (30 kB)
Collecting sympy>=1.13.3 (from torch==2.8.0->torchvision)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Downloading torchvision-0.23.0-cp312-cp312-win_amd64.whl (1.6 MB)
   ---------------------------------------- 0.0/1.6 MB ? eta -:--:--
   - -------------------------------------- 0.0/1.6 MB 991.0 kB/s eta 0:00:02
   -- ------------------------------------- 0.1/1.6 MB 980.4 kB/s eta 0:00:02
   ---- ----------------------------------- 0.2/1.6 MB 1.4 MB/s eta 0:00:02
   ------- -------------------------------- 0.3/1.6 MB 1.6 MB/s eta 0:00:01
   -------- ------------------------------- 0.3/1.6 MB 1.7 MB/s eta 0:00:01
   ------------ --------------------------- 0.5/1.6 MB 1.7 MB/s eta 0:00:01
   -------------- ------------------------- 0.6/1.6 MB 1.9 MB/s eta 0:00:

In [11]:
import os
import sys

# Method to reliably find the project root from a Jupyter Notebook.
# We'll traverse up the directory tree until we find a characteristic project folder
# like 'src' or 'data'.

current_path = os.getcwd()
project_root = None

# Search upwards for the project root marker (e.g., the 'src' directory)
# This loop will go up to the root of the file system if not found,
# but should stop at 'tomato-disease-classification/' if 'src' exists there.
temp_path = current_path
while True:
    if os.path.exists(os.path.join(temp_path, 'src')) and \
       os.path.exists(os.path.join(temp_path, 'data')):
        project_root = temp_path
        break
    parent_path = os.path.dirname(temp_path)
    if parent_path == temp_path: # Reached file system root
        break
    temp_path = parent_path

if project_root is None:
    print("Error: Could not find project root. Make sure 'src' and 'data' directories exist.")
    # Fallback to current working directory as a less robust option,
    # or raise an error to stop execution.
    project_root = current_path # Fallback - might still cause issues if not launched from root

print(f"Detected project root: {project_root}")

# Add the 'src' directory to the Python path to import modules from it
src_path = os.path.join(project_root, 'src')
if src_path not in sys.path:
    sys.path.append(src_path)
    print(f"Added '{src_path}' to sys.path.")

try:
    from data_preprocessing import preprocess_and_split_data
    print("Successfully imported preprocess_and_split_data from src/data_preprocessing.py")
except ImportError as e:
    print(f"Error importing data_preprocessing: {e}")
    print("Please ensure 'src/data_preprocessing.py' exists and is correctly defined.")
    print(f"Current sys.path: {sys.path}")

Detected project root: C:\Users\alimu\OneDrive\Desktop\tomato-disease-classification
Successfully imported preprocess_and_split_data from src/data_preprocessing.py


In [12]:
# Define paths relative to the project root
RAW_DATA_DIR = os.path.join(project_root, 'data', 'raw')
PROCESSED_DATA_DIR = os.path.join(project_root, 'data', 'processed')

print(f"Raw data directory: {RAW_DATA_DIR}")
print(f"Processed data directory: {PROCESSED_DATA_DIR}")

Raw data directory: C:\Users\alimu\OneDrive\Desktop\tomato-disease-classification\data\raw
Processed data directory: C:\Users\alimu\OneDrive\Desktop\tomato-disease-classification\data\processed


In [13]:
# Ensure the raw data directory exists before proceeding
if not os.path.exists(RAW_DATA_DIR):
    print(f"Error: Raw data directory not found at '{RAW_DATA_DIR}'.")
    print("Please ensure your original images are placed here, organized into class subfolders.")
else:
    # Call the main preprocessing function
    try:
        preprocess_and_split_data(RAW_DATA_DIR, PROCESSED_DATA_DIR)
        print("\nData preprocessing and splitting initiated. Check console for progress.")
    except Exception as e:
        print(f"An error occurred during data preprocessing: {e}")

print("\n--- Data Preprocessing Notebook Execution Complete ---")


--- Starting Data Preprocessing and Splitting ---
Raw data directory: C:\Users\alimu\OneDrive\Desktop\tomato-disease-classification\data\raw
Processed data directory: C:\Users\alimu\OneDrive\Desktop\tomato-disease-classification\data\processed
Target image size: (224, 224)
Train/Val/Test split ratios: 0.7/0.15/0.15000000000000005
Clearing existing processed data directory: C:\Users\alimu\OneDrive\Desktop\tomato-disease-classification\data\processed
Collecting image paths and labels...
Found 16011 images across 10 classes.
Dataset split sizes: Train=11207, Val=2402, Test=2402
Directory structure created at: C:\Users\alimu\OneDrive\Desktop\tomato-disease-classification\data\processed
Image transforms defined (with augmentation for training, without for validation/test).
Processing and saving train set (11207 images)...


Saving train images: 100%|███████████████████████████████████████████████████████| 11207/11207 [04:31<00:00, 41.28it/s]


Processing and saving val set (2402 images)...


Saving val images: 100%|███████████████████████████████████████████████████████████| 2402/2402 [00:43<00:00, 55.69it/s]


Processing and saving test set (2402 images)...


Saving test images: 100%|██████████████████████████████████████████████████████████| 2402/2402 [00:42<00:00, 56.13it/s]

--- Data Preprocessing and Splitting Complete! ---
Processed data available at: C:\Users\alimu\OneDrive\Desktop\tomato-disease-classification\data\processed

Data preprocessing and splitting initiated. Check console for progress.

--- Data Preprocessing Notebook Execution Complete ---



