## Rice Clasisfication

Given dataset from kaggle with rice images we are going to create Convolutional Neural Network to detect different rice types.

In [11]:
import os
import zipfile
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

DATA_PATH = os.path.join(os.getcwd(), 'data/')
RICE_PATH = os.path.join(DATA_PATH, 'rice_dataset/')


### Data preparation

Preprocess data and create train/validation split

In [12]:
zip_path = os.path.join(RICE_PATH, 'archive.zip')

zip_file = zipfile.ZipFile(zip_path, 'r')
zip_file.extractall(RICE_PATH)
zip_file.close()
os.remove(zip_path)

In [50]:
dataset_path = os.path.join(RICE_PATH, 'Rice_Image_Dataset/')

def data_counter(path):
    """
    Counts images in each directory

    Args:
      -path (string): directory path containing images
    
    Returns:
      Dictionary containing directory names as keys and number of
      files in directory as values.
    """
    directory = os.listdir(path)
    values = {directory[i]: 0 for i in range(len(directory))}
    for folder in directory:
        folder_path = os.path.join(path, folder)
        for img in os.listdir(folder_path):
            img_path = os.path.join(folder_path, img)
            if os.path.getsize(img_path) == 0:
                os.remove(img_path)
            else:
                values[folder] += 1

    return values

print(data_counter(dataset_path))

{'Karacadag': 15000, 'Arborio': 15000, 'Jasmine': 15000, 'Ipsala': 15000, 'Basmati': 15000}


In [39]:
from PIL import Image
from random import sample

def print_size(dir):
    """
    Print size of random image from each directory

    Args:
      -dir (string): directory path containing images
    
    Returns:
      None
    """
    for directory in os.listdir(dir):
        directory_path = os.path.join(dir, directory)
        image_p = sample(os.listdir(directory_path), 1)[0]
        image = Image.open(os.path.join(directory_path, image_p))
        print(f"Random image size from {directory} directory is: {image.size, image.mode}")


print_size(dataset_path)

Random image size from Karacadag directory is: ((250, 250), 'RGB')
Random image size from Arborio directory is: ((250, 250), 'RGB')
Random image size from Jasmine directory is: ((250, 250), 'RGB')
Random image size from Ipsala directory is: ((250, 250), 'RGB')
Random image size from Basmati directory is: ((250, 250), 'RGB')


In [66]:
from shutil import copyfile

def create_train_validation(data_path, training_path, validation_path, split_s):
    """
    Split the data into train and validation datasets

    Args:
      -data_path (string): directory path containing images
      -training_path (string): directory path to be used for training
      -validation_path (string): directory path to be used for validation
      -split_s (float): portion of the dataset to be used for training

    Returns:
      None
    """

    dirs = os.listdir(data_path)
    print("Data directories:")
    for img in dirs:
        print(f'-{img}')
    
    try:
        os.makedirs(training_path)
    except FileExistsError:
        print('\nTraining directory exists in this path')

    try:
        os.makedirs(validation_path)
    except FileExistsError:
        print('Validation directory exists in this path')

    direcotries_sizes = data_counter(data_path)
    
    for dir in dirs:
        train_name = os.path.join(training_path, dir)
        validation_name = os.path.join(validation_path, dir)
        os.makedirs(train_name)
        os.makedirs(validation_name)
        split = int(direcotries_sizes[dir] * split_s)
        
        curr_dir = os.path.join(data_path, dir)
        dir_shuffled = sample(os.listdir(curr_dir), len(os.listdir(curr_dir)))

        for img in dir_shuffled[:split]:
            copyfile(os.path.join(curr_dir, img), os.path.join(train_name, img))
        
        for img in dir_shuffled[split:]:
            copyfile(os.path.join(curr_dir, img), os.path.join(validation_name, img))
    
    print(f'\nTraining sizes: {data_counter(training_path)}')
    print(f'\nValidation sizes: {data_counter(validation_path)}')

    return training_path, validation_path    

    
training_path, validation_path  = create_train_validation(dataset_path, os.path.join(dataset_path, 'training/'), os.path.join(dataset_path, 'validation/'), .9)

Data directories:
-Karacadag
-Arborio
-Jasmine
-Ipsala
-Basmati

Training sizes: {'Karacadag': 13500, 'Arborio': 13500, 'Jasmine': 13500, 'Ipsala': 13500, 'Basmati': 13500}

Validation sizes: {'Karacadag': 1500, 'Arborio': 1500, 'Jasmine': 1500, 'Ipsala': 1500, 'Basmati': 1500}


In [65]:
from keras.preprocessing.image import ImageDataGenerator

def cra

ModuleNotFoundError: No module named 'keras.preprocesssing'