In [1]:
#installing Kaggle
!pip install kaggle


Collecting kaggle
  Downloading kaggle-1.6.12.tar.gz (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.7/79.7 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting tqdm (from kaggle)
  Downloading tqdm-4.66.2-py3-none-any.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting python-slugify (from kaggle)
  Downloading python_slugify-8.0.4-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting text-unidecode>=1.3 (from python-slugify->kaggle)
  Downloading text_unidecode-1.3-py2.py3-none-any.whl.metadata (2.4 kB)
Downloading python_slugify-8.0.4-py2.py3-none-any.whl (10 kB)
Downloading tqdm-4.66.2-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.3/78.3 kB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading text_unidecode-1.3-py2.py3-none-any.whl (78 kB)
[2K  

In [2]:
#downloading kaggle dataset for cherry leaves
!kaggle datasets download -d codeinstitute/cherry-leaves


Dataset URL: https://www.kaggle.com/datasets/codeinstitute/cherry-leaves
License(s): unknown
Downloading cherry-leaves.zip to /workspace/milestone-project5-mildew-detection-in-cherry-leaves/jupyter_notebooks
 91%|██████████████████████████████████▌   | 50.0M/55.0M [00:01<00:00, 46.3MB/s]
100%|██████████████████████████████████████| 55.0M/55.0M [00:01<00:00, 39.1MB/s]


In [4]:
#unzipping the file
import zipfile

with zipfile.ZipFile("cherry-leaves.zip", "r") as zip_ref:
    zip_ref.extractall("cherry-leaves")


In [18]:
import os
cwd = os.getcwd()
print("Current working directory:", cwd)


Current working directory: /workspace/milestone-project5-mildew-detection-in-cherry-leaves/jupyter_notebooks


In [4]:
from PIL import Image
import os

# Define directory paths
dataset_dirs = ["./cherry-leaves/cherry-leaves-folder/healthy", "./cherry-leaves/cherry-leaves-folder/powdery_mildew"]
output_dir = "./clean_data"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Function to resize images and save them
def resize_and_save_image(image_path, output_path, new_size=(50, 50)):
    # Load image
    image = Image.open(image_path)
    
    # Resize image
    resized_image = image.resize(new_size)
    
    # Save resized image
    resized_image.save(output_path)

# Iterate through dataset directories
for dataset_dir in dataset_dirs:
    # Get category name from directory
    category = os.path.basename(dataset_dir)
    
    # Output directory for the current category
    output_category_dir = os.path.join(output_dir, category)
    os.makedirs(output_category_dir, exist_ok=True)
    
    # Iterate through files in the dataset directory
    for file in os.listdir(dataset_dir):
        if file.lower().endswith((".jpeg", ".jpg")):
            # Input and output paths
            input_path = os.path.join(dataset_dir, file)
            output_path = os.path.join(output_category_dir, file)
            
            # Resize image and save
            resize_and_save_image(input_path, output_path)


In [7]:
from PIL import Image
import os
import numpy as np

# Define directory paths
dataset_dirs = ["./cherry-leaves/cherry-leaves-folder/healthy", "./cherry-leaves/cherry-leaves-folder/powdery_mildew"]

# Function to compute color histogram features
def compute_color_histogram(image):
    # Convert image to HSV color space
    hsv = image.convert("HSV")
    
    # Compute histograms for each channel
    hist_hue = hsv.histogram(mask=None, extrema=None)
    hist_saturation = hsv.histogram(mask=None, extrema=None)
    hist_value = hsv.histogram(mask=None, extrema=None)
    
    # Normalize histograms
    hist_hue = np.array(hist_hue) / np.sum(hist_hue)
    hist_saturation = np.array(hist_saturation) / np.sum(hist_saturation)
    hist_value = np.array(hist_value) / np.sum(hist_value)
    
    # Concatenate histograms into a single feature vector
    hist_features = np.concatenate((hist_hue, hist_saturation, hist_value))
    
    return hist_features


# Iterate through dataset directories
for dataset_dir in dataset_dirs:
    category = os.path.basename(dataset_dir)
    
    # Iterate through files in the dataset directory
    for file in os.listdir(dataset_dir):
        if file.lower().endswith((".jpeg", ".jpg")):
            # Load image
            image = Image.open(os.path.join(dataset_dir, file))
            
            # Compute color histogram features
            color_hist_features = compute_color_histogram(image)
            
            # Determine if the leaf is healthy or powdery mildew based on color histogram
            # Example: Apply a simple threshold
            if color_hist_features[0] > 0.5:  # Assuming the first feature represents hue histogram
                print(f"Leaf {file} in category {category} is healthy")
            else:
                print(f"Leaf {file} in category {category} has powdery mildew")
# Define directory paths for processed data
processed_data_dir = "./processed_data"
healthy_data_dir = os.path.join(processed_data_dir, "healthy")
mildew_data_dir = os.path.join(processed_data_dir, "powdery_mildew")

# Create directories if they don't exist
os.makedirs(healthy_data_dir, exist_ok=True)
os.makedirs(mildew_data_dir, exist_ok=True)

# Iterate through dataset directories
for dataset_dir in dataset_dirs:
    category = os.path.basename(dataset_dir)
    
    # Determine output directory based on category
    output_category_dir = healthy_data_dir if category == "healthy" else mildew_data_dir
    
    # Iterate through files in the dataset directory
    for file in os.listdir(dataset_dir):
        if file.lower().endswith((".jpeg", ".jpg")):
            # Load image
            image = Image.open(os.path.join(dataset_dir, file))
            
            # Compute color histogram features
            color_hist_features = compute_color_histogram(image)
            
            # Save processed data (features and label) as numpy array
            np.save(os.path.join(output_category_dir, file.replace(".jpg", "").replace(".jpeg", "") + ".npy"), color_hist_features)

TypeError: histogram() got an unexpected keyword argument 'source'