In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### Input Paths 

* /kaggle/input/cassava-leaf-disease-classification/sample_submission.csv
* /kaggle/input/cassava-leaf-disease-classification/label_num_to_disease_map.json
* /kaggle/input/cassava-leaf-disease-classification/train.csv
* /kaggle/input/cassava-leaf-disease-classification/train_images
* /kaggle/input/cassava-leaf-disease-classification/test_images

In [3]:
import os 
import numpy as np
import pandas as pd
from PIL import Image
import cv2 as cv 
 
data_dir = r"/kaggle/input/cassava-leaf-disease-classification/train_images"

new_dir  = r"/kaggle/working/train_resized"


def pre_process(data_dir,target_size = (224, 224)):
    images = os.listdir(data_dir)
    os.mkdir(new_dir)
    for image in images:
        k =  os.path.join(data_dir, image)
        img = cv.imread(k)
        img = cv.resize(img, target_size)
        cv.imwrite(os.path.join(new_dir, image), img)
    
pre_process(data_dir)

In [4]:
len(os.listdir(new_dir))

21397

In [None]:
# os.remove(test_csv_loc)
# shutil.rmtree(test_dir)

In [5]:
import os
import numpy as np
import pandas as pd
import cv2 as cv
import shutil

annotations = r"/kaggle/input/cassava-leaf-disease-classification/train.csv"

new_data_dir = r"/kaggle/working/train_resized"

test_csv_loc = r"/kaggle/working/test_final.csv"

test_dir = r"/kaggle/working/test_dir"

new_train_csv = r"/kaggle/working/new_train.csv"

df = pd.read_csv(annotations)

# Creating test data from the total data available 

def split_data(new_data_dir,df,test_dir,test_csv_loc):
    os.mkdir(test_dir)
    num_of_test_images = 1000
    # Randomly sample 1000 images into testset
    test = df.sample(num_of_test_images)
    
    # Move images to test directory
    for image in test["image_id"]:
        shutil.move(os.path.join(new_data_dir, image ), os.path.join(test_dir, image ))
    
    print(len(os.listdir(test_dir)), 'test images saved.')
    # Create a csv file for test data
    test.to_csv(test_csv_loc,index = False)
    # Remove the moves data from original csv file 
    df = df.drop(test.index)
    print("train shape",df.shape)
    df.to_csv(new_train_csv,index = False)
    

split_data(new_data_dir,df,test_dir,test_csv_loc)



1000 test images saved.
train shape (20397, 2)


In [6]:
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")

physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)



[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


In [7]:
if physical_devices:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in physical_devices:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(physical_devices), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

2 Physical GPUs, 2 Logical GPUs


In [8]:
data = pd.read_csv(new_train_csv)
image_paths = [f"{new_data_dir}/{name}" for name in data["image_id"]]
labels = data["label"].tolist()

In [9]:
image_paths[:4], labels[:4]

(['/kaggle/working/train_resized/1000015157.jpg',
  '/kaggle/working/train_resized/1000201771.jpg',
  '/kaggle/working/train_resized/100042118.jpg',
  '/kaggle/working/train_resized/1000723321.jpg'],
 [0, 3, 1, 1])

In [10]:
dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))

In [11]:
# Preprocess images on CPU
def preprocess_image(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)  # Assuming JPEG images
    image = tf.image.resize(image, (256, 256))  # Adjust resizing as needed
    image = tf.cast(image,tf.bfloat16)/255.0
#     image = tf.keras.applications.vgg16.preprocess_input(image)  # Example preprocessing
    return image, label

In [12]:
# batch_size = 32
dataset = dataset.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
# dataset = dataset.batch(batch_size)

In [13]:
train_size=0.8
batch_size=32

# Split the dataset into training and testing sets
train_size = int(train_size * len(dataset))
train_dataset = dataset.take(train_size)
test_dataset = dataset.skip(train_size)

# Batch and prefetch
train_dataset = train_dataset.shuffle(buffer_size=1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [14]:
train_dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.bfloat16, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

### EffecientNetB0

In [16]:
with tf.device('/GPU:0'):
    Effnet_model = tf.keras.applications.EfficientNetB0(weights='imagenet', input_shape=(256,256,3), include_top=False)
    
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(256,256,3)))
    model.add(Effnet_model)
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dropout(0.25))
    model.add(tf.keras.layers.Dense(128, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.2))
    model.add(tf.keras.layers.Dense(5, activation='softmax')) # num labels = 5

model.summary()
  

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 efficientnetb0 (Functional  (None, 8, 8, 1280)        4049571   
 )                                                               
                                                                 
 flatten (Flatten)           (None, 81920)             0         
                                                                 
 dropout (Dropout)           (None, 81920)             0         
                                                                 
 dense (Dense)               (None, 128)               10485888  
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 5)                 645       
                                                      

In [None]:
# Compile the model
model.compile(optimizer= tf.keras.optimizers.Adam(learning_rate = 0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
# Train the model in batches on GPU
history = model.fit(train_dataset,epochs=25, batch_size=32) 

Epoch 1/25


2024-01-21 16:44:41.639021: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_1/efficientnetb0/block2b_drop/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25

In [None]:
model.evaluate(test_dataset)