# **Project:** Pathology Detection in Crop Plants

Members:
* Domenico Azzarito​
* Guillermo Bajo Laborda​
* Laura Alejandra Moreno​
* Arian Gharehmohammadzadehghashghaei​
* Michele Pezza


*Fundamentals of Data Science | Sapienza University of Rome*

## 1. EAD

## 2. CNN

In this step, the image data has been loaded, and also a normaliation, resizing and augmentation process has been implemented.

The key libraries used were TensorFlow for image processing, Pandas for handling the CSV files, and also os library. 



In [1]:
#imports
import tensorflow as tf
import pandas as pd
import os

In [None]:
#data
data_dir = 'images/'  
sample_submission_csv = 'sample_submission.csv'
test_csv = 'test.csv'
train_csv = 'train.csv' 
train_df = pd.read_csv(train_csv)
print(train_df.columns)
train_df.columns = train_df.columns.str.strip()
print(train_df.head())

Index(['image_id', 'healthy', 'multiple_diseases', 'rust', 'scab'], dtype='object')
  image_id  healthy  multiple_diseases  rust  scab
0  Train_0        0                  0     0     1
1  Train_1        0                  1     0     0
2  Train_2        1                  0     0     0
3  Train_3        0                  0     1     0
4  Train_4        1                  0     0     0


In [68]:
# Convert the encoded labels into arrays
def con_process_labels(df):
    labels = df[['healthy', 'multiple_diseases', 'rust', 'scab']].values
    return labels
    

In [63]:
#Decode function to load and preprocess the image file

def decode_image(filename, label=None, image_size=(500, 500)):
    filepath = tf.strings.join([data_dir, filename])  
    bits = tf.io.read_file(filepath)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32) 
    
    if label is None:
        return image
    else:
        return image, label


In [64]:
#Data augmentation to the images

def data_augmentation(image, label=None):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_brightness(image, max_delta=0.2)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    
    if label is None:
        return image
    else:
        return image, label

In [65]:
#Create a Tensorflow dataset for testing

def prepare_dataset(df, image_size=(500, 500), batch_size=32, augment=False, is_train=True):
    file_paths = df['image_id'] + '.jpg'  
    
    if is_train:
        labels = con_process_labels(df) 
        dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels))
        dataset = dataset.map(lambda x, y: decode_image(x, y, image_size))  
    else:
        dataset = tf.data.Dataset.from_tensor_slices(file_paths)
        dataset = dataset.map(lambda x: decode_image(x, label=None, image_size=image_size))
    
    if augment and is_train:
        dataset = dataset.map(data_augmentation)  
    
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)  
    return dataset

In [66]:
# Create train dataset
train_dataset = prepare_dataset(train_df, image_size=image_size, augment=True, is_train=True)

[[0 0 0 1]
 [0 1 0 0]
 [1 0 0 0]
 [0 0 1 0]
 [1 0 0 0]]


In [69]:
# Load test data
test_df = pd.read_csv(test_csv)
test_df.columns = test_df.columns.str.strip()  # Clean the column names for test set
test_dataset = prepare_dataset(test_df, image_size=(500, 500), augment=False, is_train=False)

In [None]:
#Verification 
for image_batch, label_batch in train_dataset.take(1):
    print(f"Image batch shape: {image_batch.shape}")
    print(f"Label batch shape: {label_batch.shape}")


Image batch shape: (32, 1365, 2048, 3)
Label batch shape: (32, 4)
