## Prepare the dataset


In [None]:
import os
import zipfile
import shutil
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow import keras
from tensorflow.keras import Model
from google.colab import files
from tensorflow.keras.preprocessing import image

In [None]:
# Extract the archive
zip_ref = zipfile.ZipFile("/content/archive.zip", 'r')
zip_ref.extractall("tmp/")
zip_ref.close()

In [None]:
folder_mappings = {
    '/content/tmp/Dataset': '/content/tmp/soil_type',
    '/content/tmp/soil_type/Train': '/content/tmp/soil_type/train',
    '/content/tmp/soil_type/test': '/content/tmp/soil_type/validation',
    '/content/tmp/soil_type/train/Alluvial soil': '/content/tmp/soil_type/train/alluvial',
    '/content/tmp/soil_type/train/Black Soil' : '/content/tmp/soil_type/train/black',
    '/content/tmp/soil_type/train/Clay soil' : '/content/tmp/soil_type/train/clay',
    '/content/tmp/soil_type/train/Red soil' : '/content/tmp/soil_type/train/red',
    '/content/tmp/soil_type/validation/Alluvial soil': '/content/tmp/soil_type/validation/alluvial',
    '/content/tmp/soil_type/validation/Black Soil' : '/content/tmp/soil_type/validation/black',
    '/content/tmp/soil_type/validation/Clay soil' : '/content/tmp/soil_type/validation/clay',
    '/content/tmp/soil_type/validation/Red soil' : '/content/tmp/soil_type/validation/red',
}

# Changing the folder name
for old_folder_path, new_folder_path in folder_mappings.items():
    if os.path.exists(new_folder_path):
        shutil.rmtree(new_folder_path)  # Delete the destination folder if it already exists and is not empty
    os.rename(old_folder_path, new_folder_path)

In [None]:
# List of directories and prefixes to be used
directories = ['/content/tmp/soil_type/train/alluvial',
               '/content/tmp/soil_type/train/black',
               '/content/tmp/soil_type/train/clay',
               '/content/tmp/soil_type/train/red',
               '/content/tmp/soil_type/validation/alluvial',
               '/content/tmp/soil_type/validation/black',
               '/content/tmp/soil_type/validation/clay',
               '/content/tmp/soil_type/validation/red']

prefixes = ['alluvial_', 'black_', 'clay_', 'red_', 'alluvial_', 'black_', 'clay_', 'red_']

# Performing file renaming for each directory and prefix
for directory, prefix in zip(directories, prefixes):
    os.chdir(directory)
    files = os.listdir()
    for i, file in enumerate(files):
        new_name = prefix + str(i+1) + '.jpg'
        os.rename(file, new_name)

In [None]:
# Define our example directories and files
base_dir = '/content/tmp/soil_type'

train_dir = os.path.join( base_dir, 'train')
validation_dir = os.path.join( base_dir, 'validation')

# Directory with training pictures
train_alluvial_dir = os.path.join(train_dir, 'alluvial')
train_black_dir = os.path.join(train_dir, 'black')
train_clay_dir = os.path.join(train_dir, 'clay')
train_red_dir = os.path.join(train_dir, 'red')

# Directory with validation pictures
validation_alluvial_dir = os.path.join(validation_dir, 'alluvial')
validation_black_dir = os.path.join(train_dir, 'black')
validation_clay_dir = os.path.join(train_dir, 'clay')
validation_red_dir = os.path.join(train_dir, 'red')

In [None]:
print("Sample alluvial soil image:")
plt.imshow(load_img(f"{os.path.join(train_alluvial_dir, os.listdir(train_alluvial_dir)[1])}"))
plt.show()

# Load the first example of a alluvial soil
sample_image  = load_img(f"{os.path.join(train_alluvial_dir, os.listdir(train_alluvial_dir)[0])}")
# Convert the image into its numpy array representation
sample_array = img_to_array(sample_image)
print(f"Each image has shape: {sample_array.shape}")

print('total training alluvial images :', len(os.listdir(      train_alluvial_dir ) ))
print('total training black images :', len(os.listdir(      train_black_dir ) ))
print('total training clay images :', len(os.listdir(      train_clay_dir ) ))
print('total training red images :', len(os.listdir(      train_red_dir ) ))

print('total validation alluvial images :', len(os.listdir( validation_alluvial_dir ) ))
print('total validation black images :', len(os.listdir(      validation_black_dir ) ))
print('total validation clay images :', len(os.listdir(      validation_clay_dir ) ))
print('total validation red images :', len(os.listdir(      validation_red_dir ) ))

## Training, Validation, and Test Generator

In [None]:
def train_val_generators(TRAINING_DIR, VALIDATION_DIR):
  # Instantiate the ImageDataGenerator class
  # Normalize pixel values and set arguments to augment the images
  train_datagen = ImageDataGenerator(rescale=1./255,
                                     rotation_range=45,
                                     width_shift_range=0.2,
                                     height_shift_range=0.2,
                                     shear_range=0.2,
                                     zoom_range=0.2,
                                     horizontal_flip=True,
                                     fill_mode='nearest')

  # Pass in the appropriate arguments to the flow_from_directory method
  train_generator = train_datagen.flow_from_directory(directory=train_dir,
                                                      batch_size=20,
                                                      class_mode='categorical',
                                                      target_size=(260, 260))

  # Instantiate the ImageDataGenerator class and set the rescale argument
  # Validation data should not be augmented
  validation_datagen = ImageDataGenerator(rescale=1/255)

  # Pass in the appropriate arguments to the flow_from_directory method
  validation_generator = validation_datagen.flow_from_directory(directory=validation_dir,
                                                                batch_size=20,
                                                                class_mode='categorical',
                                                                target_size=(260, 260))
  return train_generator, validation_generator

In [None]:
# Test generators
train_generator, validation_generator = train_val_generators(train_dir, validation_dir)

# Fetching class names from the train generator
class_names_train = list(train_generator.class_indices.keys())
print("Class names train:", class_names_train)

# Fetching class names from the validation generator
class_names_validation = list(validation_generator.class_indices.keys())
print("Class names validation:", class_names_validation)


## Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Model initialization
model = Sequential()

# First convolutional layer
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(260, 260, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Second convolutional layer
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Third convolutional layer
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten layer
model.add(Flatten())

# Fully connected layer
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))

# Output layer
model.add(Dense(4, activation='softmax'))

# Compile model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Displaying model summary
model.summary()

## Evaluated Model

In [None]:
# Define a Callback class that stops training once accuracy reaches 95%
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('accuracy')>0.95):
      print("\nReached 95% accuracy so cancelling training!")
      self.model.stop_training = True

In [None]:
# Learning rate value
learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
#Initializing custom callback
callbacks = myCallback()
steps_per_epoch = 1222 // 20
validation_steps = 341 // 20
#Training the model
history = model.fit(train_generator,
                    epochs=100,
                    verbose = 2,
                    steps_per_epoch=steps_per_epoch,
                    validation_data=validation_generator,
                    validation_steps=validation_steps,
                    callbacks=callbacks)

## Evaluate the results

You will use the same code to plot the results. As you can see, the validation accuracy is also trending upwards as your training accuracy improves. This is a good sign that your model is no longer overfitting!

In [None]:
import matplotlib.pyplot as plt

# Plot the model results
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')

plt.figure()

plt.plot(epochs, loss, 'r', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
# Obtaining the train matrix
train_data = train_generator.next()[0]

# Calculating the number of samples and classes
num_samples = train_data.shape[0]
num_classes = train_data.shape[3]

# Displaying the train matrix
fig, axes = plt.subplots(num_classes, num_samples, figsize=(num_samples, num_classes))

for i in range(num_classes):
    for j in range(num_samples):
        axes[i][j].imshow(train_data[j, :, :, i])
        axes[i][j].axis('off')

plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load the validation dataset
labels = validation_generator.class_indices.keys()
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    validation_dir,  # Path to test directory
    target_size=(260, 260),
    batch_size=20,
    class_mode='categorical',
    shuffle=False
)

# Make predictions
y_true = test_generator.classes
y_pred = model.predict(test_generator)
y_pred = np.argmax(y_pred, axis=1)

# Compute confusion matrix
confusion_mtx = confusion_matrix(y_true, y_pred)

# Plot the confusion matrix as an image
plt.imshow(confusion_mtx, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(labels))
plt.xticks(tick_marks, labels, rotation=45)
plt.yticks(tick_marks, labels)

thresh = confusion_mtx.max() / 2.
for i in range(confusion_mtx.shape[0]):
    for j in range(confusion_mtx.shape[1]):
        plt.text(j, i, format(confusion_mtx[i, j], 'd'),
                 horizontalalignment="center",
                 color="white" if confusion_mtx[i, j] > thresh else "black")

plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.tight_layout()
plt.show()

# Create classification report
classification_rep = classification_report(y_true, y_pred, target_names=labels)
print('Classification Report:\n', classification_rep)

## Testing 1

In [None]:
from google.colab import files

labels = train_generator.class_indices.keys()
uploaded = files.upload()

for fn in uploaded.keys():
    # Load the image
    img = image.load_img(fn, target_size=(260, 260))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)

    # Normalization
    x = x / 255.0

    # Predicting the image
    proba = model.predict(x)[0]
    maxx = proba.max()

    # Displaying the image
    plt.imshow(img)
    plt.show()

    # Displaying the prediction results
    for label, p in zip(labels, proba):
        print("{}: {:.2f}%".format(label, p * 100))

    # Displaying the best prediction result
    for label, p in zip(labels, proba):
        if p <= 0.4:
            continue
        elif p == maxx:
            if p >= 0.5:
                print('\nResult: ')
                print("{}: {:.2f}%".format(label, p * 100))
            else:
                print('\nTanah ini tidak terdeteksi!')

# Xception

In [None]:
!wget --no-check-certificate \
    https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5\
    -O /content/Xception_weights_tf_dim_ordering_tf_kernels_notop.h5

In [None]:
from tensorflow.keras.applications.xception import Xception

# Create an instance of the inception model from the local pre-trained weights
local_weights_file = '/content/Xception_weights_tf_dim_ordering_tf_kernels_notop.h5'

In [None]:
def create_pre_trained_model(local_weights_file):
  pre_trained_model = tf.keras.applications.xception.Xception(input_shape = (260, 260, 3),
                                  include_top = False,
                                  weights = None)

  pre_trained_model.load_weights(local_weights_file)

  # Make all the layers in the pre-trained model non-trainable
  for layer in pre_trained_model.layers:
    layer.trainable = False

  return pre_trained_model

In [None]:
pre_trained_model = create_pre_trained_model(local_weights_file)

# Print the model summary
pre_trained_model.summary()

In [None]:
def output_of_last_layer(pre_trained_model):
  last_desired_layer = pre_trained_model.get_layer('add_7')
  print('last layer output shape: ', last_desired_layer.output_shape)
  last_output = last_desired_layer.output
  print('last layer output: ', last_output)

  return last_output

In [None]:
last_output = output_of_last_layer(pre_trained_model)

In [None]:
# Flatten the output layer to 1 dimension
x = keras.layers.Flatten()(last_output)
# Add a fully connected layer with 256 hidden units and ReLU activation
x = keras.layers.Dense(256, activation='relu')(x)
# Add a dropout rate of 0.2
x = keras.layers.Dropout(0.2)(x)
# Add a final sigmoid layer for classification
x = keras.layers.Dense  (4, activation='softmax')(x)

# Append the dense network to the base model
model = Model(pre_trained_model.input, x)

# Print the model summary. See your dense network connected at the end.
model.summary()

# Compile the model
model.compile(optimizer=keras.optimizers.Adam(0.0001),  # Low learning rate
                loss='categorical_crossentropy',
                metrics=['accuracy'])

In [None]:
callbacks = myCallback()
steps_per_epoch = 1222 // 20
validation_steps = 341 // 20
history = model.fit(train_generator,
                    epochs=100,
                    verbose = 2,
                    steps_per_epoch=steps_per_epoch,
                    validation_data=validation_generator,
                    validation_steps=validation_steps,
                    callbacks=callbacks)

In [None]:
# Plot the model results
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')

plt.figure()

plt.plot(epochs, loss, 'r', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load the validation dataset
labels = validation_generator.class_indices.keys()
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    validation_dir,  # Path to test directory
    target_size=(260, 260),
    batch_size=20,
    class_mode='categorical',
    shuffle=False
)

# Make predictions
y_true = test_generator.classes
y_pred = model.predict(test_generator)
y_pred = np.argmax(y_pred, axis=1)

# Compute confusion matrix
confusion_mtx = confusion_matrix(y_true, y_pred)

# Plot the confusion matrix as an image
plt.imshow(confusion_mtx, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(labels))
plt.xticks(tick_marks, labels, rotation=45)
plt.yticks(tick_marks, labels)

thresh = confusion_mtx.max() / 2.
for i in range(confusion_mtx.shape[0]):
    for j in range(confusion_mtx.shape[1]):
        plt.text(j, i, format(confusion_mtx[i, j], 'd'),
                 horizontalalignment="center",
                 color="white" if confusion_mtx[i, j] > thresh else "black")

plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.tight_layout()
plt.show()

# Create classification report
classification_rep = classification_report(y_true, y_pred, target_names=labels)
print('Classification Report:\n', classification_rep)

In [None]:
def download_history():
  import pickle
  from google.colab import files

  with open('history.pkl', 'wb') as f:
    pickle.dump(history.history, f)

  files.download('history.pkl')

download_history()

## Saving Model

In [None]:
saved_model_path = "./model_1.h5"

model.save(saved_model_path)

In [None]:
files.download('model_1.h5')