# Importing Dependencies
We will be using Tensorflow for the network, and Scikit-learn for the GLCM computation.

In [None]:
import os
import glob
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import cv2
import time
import seaborn as sns
import tensorflow as tf
import tensorflow.keras as keras 
import random

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Input, Dropout, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn import metrics


from skimage.feature import greycomatrix, greycoprops
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

# Data Loading

If you are paying attention to the [Chest X-ray (Covid-19 & Pneumonia)](https://www.kaggle.com/prashant268/chest-xray-covid19-pneumonia) dataset, there are only xxx number of Covid19 training data, compared to the xxx number of the pneumonia training data. Thus, we definitely (but not mandatory) need to balance number of data between each class. The [COVID-19 Radiography Database](https://www.kaggle.com/tawsifurrahman/covid19-radiography-database) will gives us some more Covid19 x-ray data for the training.

In [None]:
train_dir = '/kaggle/input/coffee-bean-dataset-resized-224-x-224/train/*'
test_dir = '/kaggle/input/coffee-bean-dataset-resized-224-x-224/test/*'

In [None]:
#Labeling data
categories_dict = {
  0: "Dark",
  1: "Green",
  2: "Light",
  3: "Medium"  
}

In [None]:
SIZE = 224
TARGET_SIZE = (SIZE,SIZE)
train_images = []
train_labels = [] 
label = 0 

#Importing the first training dataset

for directory_path in glob.glob(train_dir):
    assert categories_dict[label] == os.path.normpath(directory_path).split(os.path.sep)[-1]
    print(categories_dict[label])
    counter = 1
    for img_path in glob.glob(os.path.join(directory_path, "*.png")):
        if(counter%200==0): print(counter,"images loaded")
        img = cv2.imread(img_path, 0)
        img = cv2.resize(img, TARGET_SIZE)
        train_images.append(img)
        train_labels.append(label)
        counter+=1
        if(counter%1500==0): break
    
    print(counter,"images loaded")
    label +=1
    

x_train = np.array(train_images)
y_train = to_categorical(train_labels, 4)

In [None]:
test_images = []
test_labels = []
label = 0

#importing the testing dataset

for directory_path in glob.glob(test_dir):
    assert categories_dict[label] == os.path.normpath(directory_path).split(os.path.sep)[-1]
    print(categories_dict[label])
    counter = 1
    for img_path in glob.glob(os.path.join(directory_path, "*.png")):
        if(counter%100==0): print(counter, "images loaded")
        img = cv2.imread(img_path, 0)
        img = cv2.resize(img, TARGET_SIZE)
        test_images.append(img)
        test_labels.append(label)
        counter+=1
    
    print(counter,"images loaded")
    label +=1

test_images = np.array(test_images)
test_labels = to_categorical(test_labels, 4)

**Splitting data into train and test dataset**

In [None]:
train_test_split(train_images, train_labels)
train_images, test_images, train_labels, test_labels = train_test_split(x_train, y_train, test_size=0.2,random_state=69)

In [None]:
print("train:",train_images.shape[0],", test:",test_images.shape[0])

# "Traditional" Feature Extraction with GLCM

In [None]:
def feature_extractor(images):
    image_dataset = pd.DataFrame()
    for image in images:   
        df = pd.DataFrame()
        
        #greycomatrix(image, distances, angles, levels=256, symmetric=False, normed=False)
        #distances - List of pixel pair distance offsets.
        #angles - List of pixel pair angles in radians.
        
        #5 configuration for the grey-level co-occurrence matrix calculation
        dists = [[1],[3],[5],[3],[3]]
        angles = [[0],[45],[90],[np.pi/4],[np.pi/2]]
        
        for n ,(dist, angle) in enumerate(zip(dists, angles)):
        
            GLCM = greycomatrix(image, dist, angle)       
            GLCM_Energy = greycoprops(GLCM, 'energy')[0]
            df['Energy'+str(n)] = GLCM_Energy
            GLCM_corr = greycoprops(GLCM, 'correlation')[0]
            df['Corr'+str(n)] = GLCM_corr       
            GLCM_diss = greycoprops(GLCM, 'dissimilarity')[0]
            df['Diss_sim'+str(n)] = GLCM_diss       
            GLCM_hom = greycoprops(GLCM, 'homogeneity')[0]
            df['Homogen'+str(n)] = GLCM_hom       
            GLCM_contr = greycoprops(GLCM, 'contrast')[0]
            df['Contrast'+str(n)] = GLCM_contr

        image_dataset = image_dataset.append(df)
        
    return image_dataset

In [None]:
train_extr_features = feature_extractor(train_images)

In [None]:
test_extr_features = feature_extractor(test_images)

In [None]:
type(train_images)
# convert from integers to floats
train_images_norm = train_images.astype('float32')
test_images_norm = test_images.astype('float32')

#normalize to the range 0-1
train_images_norm /= 224.0
test_images_norm /= 224.0


In [None]:
pd.set_option("display.max_columns", None)
train_extr_features

# Building the Network

**Convolutional Neural Network to extract the high-level features**

In [None]:
def build_cnn():
    model = keras.Sequential([
        keras.Input(shape=(224,224,1), name='Original_Images'),
        keras.layers.Conv2D(input_shape=(140,140,1), filters=32, kernel_size=11, strides=1, activation='relu', name='Conv1'),
        keras.layers.Conv2D(input_shape=(130,130,32), filters=32, kernel_size=11, strides=1, activation='relu', name='Conv2'),
        keras.layers.MaxPool2D(pool_size=(5, 5), strides=2),
        keras.layers.Conv2D(input_shape=(58,58,32), filters=64, kernel_size=9, strides=1, activation='relu', name='Conv3'),
        keras.layers.MaxPool2D(pool_size=(5, 5), strides=2),
        keras.layers.Conv2D(input_shape=(23,23,64), filters=128, kernel_size=8, strides=1, activation='relu', name='Conv4'),
        keras.layers.Conv2D(input_shape=(16,16,128), filters=256, kernel_size=9, strides=1, activation='relu', name='Conv5'),
        keras.layers.Conv2D(input_shape=(8,8,256), filters=256, kernel_size=8,  strides=1, activation='relu', name='Conv6'),    

        keras.layers.Flatten(),
        keras.layers.Dense(8, activation=tf.keras.activations.relu, name='Dense')
    ])
    print(model.summary())
    return model

**Multilayer Perceptron to learn the numerical feature data extracted from the GLCM**

In [None]:
def build_mlp():
    model = keras.Sequential([
        keras.Input(shape=25),
        keras.layers.Dense(8, activation=tf.keras.activations.relu, name='Dense1'),
        keras.layers.Dense(4, activation=tf.keras.activations.relu, name='Dense2')
    ])
    print(model.summary())
    return model

**Intertwining the 2 feature extractor**

In [None]:
mlp = build_mlp()
cnn = build_cnn()

combinedInput = concatenate([mlp.output, cnn.output])

x = Dense(8, activation="relu")(combinedInput)
x = Dense(4, activation="softmax")(x)

model = Model(inputs=[mlp.input, cnn.input], outputs=x)


In this version of the notebook, we are using Adam as the optimizer. Please kindly check the older version of the notebook to see how other optimizer, such as SGD and RMSprop, performs on the network.

**Reduce Learning Rate on Plateau**, to reduce the learning rate gradually if there is no significant improvement on the performance of the network. 

# Learning

In [None]:
 opt = keras.optimizers.Adam(learning_rate=0.001)

In [None]:
model.compile(optimizer=opt , loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=[keras.metrics.CategoricalAccuracy()])

plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
#Adaptive Learning Rate(ALR)

from tensorflow.keras.callbacks import ReduceLROnPlateau

cb = [
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.1,
        patience=10,
        mode='auto',
        min_delta=0.0002,
        cooldown=5,
        min_lr=10e-8,
        verbose=1,
    )
]

In [None]:
#parameter
BATCH_SIZE = 64
EPOCH_NUM = 150

In [None]:
dataset_inputs = tf.data.Dataset.from_tensor_slices((train_extr_features, tf.expand_dims(train_images_norm, axis=-1)))
dataset_label = tf.data.Dataset.from_tensor_slices(train_labels)

dataset = tf.data.Dataset.zip((dataset_inputs, dataset_label)).batch(BATCH_SIZE).repeat()
STEP_SIZE_TRAIN= train_images_norm.shape[0]//BATCH_SIZE

start_time = time.time()

# fit model
history = model.fit(dataset, 
                    validation_data=([test_extr_features, tf.expand_dims(test_images_norm, axis=-1)], test_labels),
                    epochs = EPOCH_NUM, steps_per_epoch=STEP_SIZE_TRAIN,callbacks=cb)
# Calculate training time
training_time = time.time() - start_time
print("\nTraining time: {:.2f} seconds".format(training_time))

In [None]:
model.save_weights('.Tingkat_RoastingKopi')

# Testing

In [None]:
test_inputs = tf.data.Dataset.from_tensor_slices((test_extr_features, tf.expand_dims(test_images_norm, axis=-1)))
test_labelz = tf.data.Dataset.from_tensor_slices(test_labels)

test_dataset = tf.data.Dataset.zip((test_inputs, test_labelz)).batch(BATCH_SIZE).repeat()
STEP_SIZE_TEST= test_images_norm.shape[0]//BATCH_SIZE

score = model.evaluate(test_dataset, batch_size=BATCH_SIZE, steps=STEP_SIZE_TEST )
print(f'Test loss: {score[0]} / Test accuracy: {score[1]}')

# Plotting the metrics

In [None]:
import matplotlib.pyplot as plt
history.history.keys()

In [None]:
plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.title('Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
import skimage
print(skimage.__version__)


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from skimage.feature import greycomatrix, greycoprops
from skimage.color import rgb2gray
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
import cv2  # or you can use skimage.color.rgb2gray

# image_name = "Dark/dark (1).png"
# image_name = "Light/light (1).png"
image_name = "Green/green (1).png"
# image_name = "Medium/medium (1).png"
test_dir = "/kaggle/input/coffee-bean-dataset-resized-224-x-224/test/"
image_path = os.path.join(test_dir, image_name)  # New test image

# Load image in RGB
img = load_img(image_path, target_size=(img_height, img_width))
img_array = img_to_array(img)

# Convert to grayscale (single channel) for model input
img_gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
img_gray_input = np.expand_dims(img_gray, axis=-1)  # Add channel dim
img_gray_input = np.expand_dims(img_gray_input, axis=0)  # Add batch dim

# For GLCM feature extraction - 2D uint8 grayscale image without batch dim
img_gray_for_glcm = img_gray.astype('uint8')

# print("feature_array shape:", feature_array.shape)
# print("img_gray_input shape:", img_gray_input.shape)

inputs = [feature_array, img_gray_input]
predictions = model.predict(inputs)

# for i, input_layer in enumerate(model.input):
#     print(f"Input {i}: name={input_layer.name}, shape={input_layer.shape}")

# print(len(model.input))

class_names = ["Dark", "Green", "Light", "Medium"] 

# Now use img_gray_input for model input 
# and img_gray_for_glcm for gray level co-occurrence matrix feature extraction

# GLCM feature extraction helper
def extract_glcm_features(image, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4]):
    features = []
    glcm = greycomatrix(image, distances=distances, angles=angles, symmetric=True, normed=True)
    props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM']
    for prop in props:
        vals = greycoprops(glcm, prop).flatten()
        features.extend(vals)
    return np.array(features)

# Extract GLCM features (this example extracts 24 features)
# glcm_features = extract_glcm_features(img_gray_uint8)
glcm_features = extract_glcm_features(img_gray_for_glcm)
# print("GLCM features shape:", glcm_features.shape)  # Should be (24,)

# If model expects 25 features, add one dummy or compute an extra feature
if glcm_features.shape[0] == 24:
    glcm_features = np.append(glcm_features, 0)  # pad to 25 features

feature_array = glcm_features.reshape(1, -1)  # shape: (1, 25)

# Prediction
inputs = [feature_array, img_gray_input]
predictions = model.predict(inputs)

# Softmax and output
score = tf.nn.softmax(predictions[0])
predicted_class = class_names[np.argmax(score)]
confidence = 100 * np.max(score)

print(f"Predicted class: {predicted_class}, Confidence: {confidence:.2f}%")

# print("Original image shape:", original_img.shape)

img = load_img(image_path, target_size=(img_height, img_width))
img_array = img_to_array(img)
# print("After load_img and img_to_array:", img_array.shape)  # Should be (224,224,3)

plt.imshow(img_array.astype('uint8'))
plt.axis('off')
plt.title("Original RGB Image")
plt.show()
