In [None]:
!pip install torchsummary

import os                       # for working with files
import numpy as np              # for numerical computationss
import pandas as pd             # for working with dataframes
import torch                    # Pytorch module
import matplotlib.pyplot as plt # for plotting informations on graph and images using tensors
import torch.nn as nn           # for creating  neural networks
from torch.utils.data import DataLoader # for dataloaders
from PIL import Image           # for checking images
import torch.nn.functional as F # for functions for calculating loss
import torchvision.transforms as transforms   # for transforming images into tensors
from torchvision.utils import make_grid       # for data checking
from torchvision.datasets import ImageFolder  # for working with classes and images
from torchsummary import summary              # for getting the summary of our model

%matplotlib inline


In [None]:
## to upload test zip file.

from zipfile import ZipFile
file_name = "/content/test.zip"

with ZipFile(file_name, 'r') as zip:
  zip.extractall()
  print('Done.')


## to upload train zip file.

from zipfile import ZipFile
file_name = "/content/train.zip"

with ZipFile(file_name, 'r') as zip:
  zip.extractall()
  print('Done again.')

# setting up our data dictionaries.

# data_dir = "/content/PROJ3"
# train_dir = data_dir + "/Train"
test_dir = "/content/test"
train_dir = "/content/train"
#test_dir = data_dir + "/Test"

diseases = os.listdir(train_dir)

print(diseases)


indices = [1, 2, 3, 4, 7, 9, 10, 11, 14, 15, 16, 19, 20, 21, 24]

# Assign the list comprehension result to new_diseases
new_diseases = [diseases[i] for i in indices]


# Print the new_diseases list
print(new_diseases)
print("Total disease classes are: {}".format(len(diseases)))

plants = []
NumberOfDiseases = 0
for plant in new_diseases:
    parts = plant.split('___')  # Split the plant string
    if len(parts) > 0 and parts[0] not in plants: # Check if there's at least one element before accessing it
        plants.append(parts[0])
    if len(parts) > 1 and parts[1] != 'healthy': # Check if there's a second element before accessing it
        NumberOfDiseases += 1


# unique plants in the dataset
print(f"Unique Plants are: \n{plants}")

print("Number of plants: {}".format(len(plants)))

print("Number of diseases: {}".format(NumberOfDiseases))

# Number of images for each disease
nums = {}
for disease in diseases:
    disease_path = os.path.join(train_dir, disease)
    if os.path.isdir(disease_path):  # Checking if it's actually a directory (had some issues with DStore metadata folder being added)
        nums[disease] = len(os.listdir(disease_path))

# converting the nums dictionary to pandas dataframe
img_per_class = pd.DataFrame(nums.values(), index=nums.keys(), columns=["no. of images"])
img_per_class


# plotting number of images available for each disease
# index = [n for n in range(38)] # This creates a list of 38 elements
plt.figure(figsize=(20, 5))
plt.bar(nums.keys(), nums.values(), width=0.3) # Use keys and values directly
plt.xlabel('Plants/Diseases', fontsize=10)
plt.ylabel('No of images available', fontsize=10)
plt.xticks(range(len(nums)), nums.keys(), fontsize=5, rotation=90) # Use range based on num keys
plt.title('Images per each class of plant disease')


n_train = 0
for value in nums.values():
    n_train += value
print(f"There are {n_train} images for training.")


from torchvision import transforms

# Define a transform that resizes all images to the same dimensions
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Choose dimensions that work for your model
    transforms.ToTensor(),
    # Add any other transformations you need
])

test = ImageFolder(test_dir, transform=transform) # Apply transform here
train = ImageFolder(train_dir, transform=transform) # Apply transform here

import os
import shutil

# Remove the .ipynb_checkpoints directory
checkpoint_dir = os.path.join(test_dir, '.ipynb_checkpoints')
if os.path.exists(checkpoint_dir):
    shutil.rmtree(checkpoint_dir)

checkpoint_dir = os.path.join(train_dir, '.ipynb_checkpoints')
if os.path.exists(checkpoint_dir):
    shutil.rmtree(checkpoint_dir)


import cv2

img_train = []
label_train = []

directory = "/content/train"

for crop_folder in os.listdir(directory):
    # Skip .DS_Store file
    if crop_folder == ".DS_Store":
        continue

    sub_direct = os.path.join(directory, crop_folder)
    crop_type = crop_folder.split()[0]

    for filename in os.listdir(sub_direct):
        file = os.path.join(sub_direct, filename)
        if os.path.isfile(file):
          img = cv2.imread(file)
        # Resize image to (128, 128)
        try:
            img_resized = cv2.resize(img, (1400, 1000), interpolation=cv2.INTER_AREA)
        except:
            print(file, " - failed")
        # Convert image to grayscale
        img_gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
        img_train.append(img_gray)
        img_train.append(file)
        label_train.append(crop_type)
        if os.path.isdir(disease_path):  # Checking if it's actually a directory (had some issues with DStore metadata folder being added)
          nums[disease] = len(os.listdir(disease_path))


img, label = train[0]
print(img.shape, label)

# for checking some images from training dataset
def show_image(image, label):
    print("Label :" + train.classes[label] + "(" + str(label) + ")")
    plt.imshow(image.permute(1, 2, 0))



show_image(*train[130]) # dislaying what a healthy leaf would look like.




# Actual model building - I won't give you all the code, but I will set you up with some prelimary code to give you some ideas on how to proceed...

In [None]:
# Setting the seed value.
random_seed = 7
torch.manual_seed(random_seed)

# setting the batch size
batch_size = 32

# DataLoaders for training and validation
train_dl = DataLoader(train, batch_size, shuffle=True, num_workers=2, pin_memory=True)
test_dl = DataLoader(test, batch_size, num_workers=2, pin_memory=True)


# helper function to show a batch of training instances
def show_batch(data):
    for images, labels in data:
        # Resize images to a common size
        resized_images = [transforms.Resize((224, 224))(img) for img in images]

        fig, ax = plt.subplots(figsize=(30, 30))
        ax.set_xticks([]); ax.set_yticks([])
        ax.imshow(make_grid(resized_images, nrow=8).permute(1, 2, 0))
        break

## can change the 2 to match the number of cores that we're using.


show_batch(train_dl)

import os
import subprocess
import multiprocessing
import numpy as np
from joblib import Parallel, delayed
import time

def get_available_cores():
    """ Detect the number of available CPU cores on Rivanna. Returns # of cores allocated to the job or available on the system. """
    # First check SLURM environment variables (if running as a job)
    if 'SLURM_CPUS_PER_TASK' in os.environ:
        return int(os.environ['SLURM_CPUS_PER_TASK'])
    elif 'SLURM_NTASKS' in os.environ:
        return int(os.environ['SLURM_NTASKS'])

    # If not in SLURM or as fallback, use multiprocessing
    return multiprocessing.cpu_count()

def parallel_process(function, items_list, n_jobs=None, verbose=1):
    if n_jobs is None:
        n_jobs = get_available_cores()

    print(f"Running {len(items_list)} tasks on {n_jobs} cores...")
    start_time = time.time()

    results = Parallel(n_jobs=n_jobs, verbose=verbose)(
        delayed(function)(item) for item in items_list
    )

    end_time = time.time()
    print(f"Completed in {end_time - start_time:.2f} seconds")

    return results

def submit_slurm_job(script_path, job_name, n_cores=4, mem_per_cpu="4G",
                     time_limit="02:00:00", partition="standard",
                     output_file="slurm_%j.out"):
    cmd = [
        "sbatch",
        f"--job-name={job_name}",
        f"--cpus-per-task={n_cores}",
        f"--mem-per-cpu={mem_per_cpu}",
        f"--time={time_limit}",
        f"--partition={partition}",
        f"--output={output_file}",
        script_path
    ]

    result = subprocess.run(cmd, capture_output=True, text=True)

    if result.returncode == 0:
        # Extract job ID from output (typically "Submitted batch job 123456")
        job_id = result.stdout.strip().split()[-1]
        print(f"Job submitted with ID: {job_id}")
        return job_id
    else:
        print(f"Error submitting job: {result.stderr}")
        return None

def batch_process_with_numpy(data, processing_func, batch_size=1000):
    results = []
    total_items = len(data)

    for i in range(0, total_items, batch_size):
        batch = data[i:min(i + batch_size, total_items)]
        batch_result = processing_func(batch)
        results.append(batch_result)

        print(f"Processed batch {i//batch_size + 1}/{(total_items-1)//batch_size + 1}")

    return results

def create_slurm_script(script_file, commands, modules=None, conda_env=None):
    with open(script_file, 'w') as f:
        f.write("#!/bin/bash\n\n")

        if modules:
            for module in modules:
                f.write(f"module load {module}\n")
            f.write("\n")

        if conda_env:
            f.write(f"source activate {conda_env}\n\n")

        for cmd in commands:
            f.write(f"{cmd}\n")

    # Make the script executable
    os.chmod(script_file, 0o755)
    print(f"Created SLURM script: {script_file}")


import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import models, transforms
import time
import copy
import matplotlib.pyplot as plt
import numpy as np
import os
from torchvision.datasets import ImageFolder
from PIL import Image, UnidentifiedImageError
import torch


class RobustImageFolder(ImageFolder):
    def __getitem__(self, idx):
        try:
            return super().__getitem__(idx)
        except (UnidentifiedImageError, OSError) as e:
            print(f"Error loading image at index {idx}: {e}")
            if len(self) > 1:
                new_idx = (idx + 1) % len(self)
                print(f"Trying alternate image at index {new_idx}")
                return self.__getitem__(new_idx)
            else:
                print("Creating blank placeholder image")
                placeholder = torch.zeros((3, 224, 224))
                return placeholder, 0

class TransformedDataset(torch.utils.data.Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform
        if hasattr(dataset, 'targets'):
            self.labels = dataset.targets
        elif hasattr(dataset, 'labels'):
            self.labels = dataset.labels
        else:
            self.labels = [s[1] for s in dataset.samples] if hasattr(dataset, 'samples') else [0] * len(dataset)

    def __getitem__(self, idx):
        try:
            # Handle different dataset formats
            if hasattr(self.dataset, 'samples'):
                img_path, label = self.dataset.samples[idx]
            elif isinstance(self.dataset[idx], tuple):
                img_path, label = self.dataset[idx]
            else:
                img_path = self.dataset[idx]
                label = self.labels[idx]

            # Load the image
            img = Image.open(img_path).convert('RGB')

            # Apply transforms
            if self.transform:
                img = self.transform(img)  # This should include ToTensor()

            return img, label
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            # Return a zero tensor with the correct dimensions
            img = torch.zeros((3, 224, 224))
            return img, self.labels[idx] if idx < len(self.labels) else 0

    def __len__(self):
        return len(self.dataset)