# **TUBES PCD**

**1. Load Dataset**

In [None]:
import pandas as pd
from google.colab import drive
import os

#memberikan akses kepada google colab
drive.mount('/content/drive')

dataset_path='/content/drive/My Drive/Tubes_PCD/dataset_fix'

filepaths=[]
labels=[]
classlist=os.listdir(dataset_path)
for klass in classlist:
    classpath=os.path.join(dataset_path,klass)
    if os.path.isdir(classpath):
        flist=os.listdir(classpath)
        for f in flist:
            fpath=os.path.join(classpath,f)
            filepaths.append(fpath)
            labels.append(klass)
Fseries= pd.Series(filepaths, name='filepaths')
Lseries=pd.Series(labels, name='labels')
df=pd.concat([Fseries, Lseries], axis=1)
print (df.head())
print (df['labels'].value_counts())

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
                                           filepaths  labels
0  /content/drive/My Drive/Tubes_PCD/dataset_fix/...  Carrot
1  /content/drive/My Drive/Tubes_PCD/dataset_fix/...  Carrot
2  /content/drive/My Drive/Tubes_PCD/dataset_fix/...  Carrot
3  /content/drive/My Drive/Tubes_PCD/dataset_fix/...  Carrot
4  /content/drive/My Drive/Tubes_PCD/dataset_fix/...  Carrot
labels
Carrot      100
Broccoli    100
Capsicum    100
Potato      100
Name: count, dtype: int64


**3. Pemrosesan Citra Digital**

In [None]:
import cv2
import numpy as np
import os

def augment_image(image_path, output_path, augmentations):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not read image at {image_path}")
        return

    height, width = img.shape[:2]
    for i, aug_type in enumerate(augmentations):
        if aug_type == 'rotate':
            angle = np.random.randint(-30, 30)  # Random rotation angle
            rotation_matrix = cv2.getRotationMatrix2D((width/2, height/2), angle, 1)
            rotated_img = cv2.warpAffine(img, rotation_matrix, (width, height))
            output_file = os.path.join(output_path, f"{os.path.splitext(os.path.basename(image_path))[0]}_rotated_{i}.jpg")
            cv2.imwrite(output_file, rotated_img)

        elif aug_type == 'horizontal_flip':
            flipped_img = cv2.flip(img, 1)
            output_file = os.path.join(output_path, f"{os.path.splitext(os.path.basename(image_path))[0]}_hflip_{i}.jpg")
            cv2.imwrite(output_file, flipped_img)

        elif aug_type == 'vertical_flip':
            flipped_img = cv2.flip(img, 0)
            output_file = os.path.join(output_path, f"{os.path.splitext(os.path.basename(image_path))[0]}_vflip_{i}.jpg")
            cv2.imwrite(output_file, flipped_img)

def augment_dataset(dataset_path, output_base_folder, augmentations):
    for subdir, _, files in os.walk(dataset_path):
        for file in files:
          if file.endswith(('.png', '.jpg', '.jpeg')):
              image_path = os.path.join(subdir, file)
              relative_path = os.path.relpath(subdir, dataset_path)
              output_path = os.path.join(output_base_folder, relative_path)
              os.makedirs(output_path, exist_ok=True)
              augment_image(image_path, output_path, augmentations)

# Example Usage
dataset_path = '/content/drive/My Drive/Tubes_PCD/dataset_fix'
output_base_folder = '/content/drive/My Drive/Tubes_PCD/Augmented_Dataset' # Ganti dengan path folder output
augmentations = ['rotate', 'horizontal_flip', 'vertical_flip']

augment_dataset(dataset_path, output_base_folder, augmentations)

In [None]:
# prompt: melakukan resize untuk menyeragamkan ukuran gambar agar sesuai dengan input layer pertama model CNN.

import cv2

def resize_image(image_path, target_size=(224, 224)):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not read image at {image_path}")
        return None
    resized_img = cv2.resize(img, target_size)
    return resized_img

def resize_dataset(dataset_path, target_size=(224, 224)):
    for subdir, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(subdir, file)
                resized_img = resize_image(image_path, target_size)
                if resized_img is not None:
                    cv2.imwrite(image_path, resized_img)

# Example usage
dataset_path = '/content/drive/My Drive/Tubes_PCD/Augmented_Dataset' # Replace with your dataset path
resize_dataset(dataset_path)

In [None]:
# prompt: lakukan penghapusan noise dengan median filtering dan gaussian blurring

def apply_median_filter(image_path, kernel_size=3):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Error: Could not read image at {image_path}")
        return None
    median_filtered_img = cv2.medianBlur(img, kernel_size)
    return median_filtered_img

def apply_gaussian_blur(image_path, kernel_size=(5, 5), sigmaX=0):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Error: Could not read image at {image_path}")
        return None
    gaussian_blurred_img = cv2.GaussianBlur(img, kernel_size, sigmaX)
    return gaussian_blurred_img

def process_dataset(dataset_path, filter_type='median', kernel_size=3, save=True):
    for subdir, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(subdir, file)
                if filter_type == 'median':
                    processed_img = apply_median_filter(image_path, kernel_size)
                elif filter_type == 'gaussian':
                    processed_img = apply_gaussian_blur(image_path, (kernel_size, kernel_size))
                else:
                    print("Invalid filter type. Choose 'median' or 'gaussian'.")
                    continue

                if processed_img is not None and save:
                    cv2.imwrite(image_path, processed_img)


# Example usage: Median Filtering
dataset_path = '/content/drive/My Drive/Tubes_PCD/Augmented_Dataset'
process_dataset(dataset_path, filter_type='median', kernel_size=3)

# Example usage: Gaussian Blurring
# process_dataset(dataset_path, filter_type='gaussian', kernel_size=5)

In [None]:
# prompt: konversi ke grayscale

def convert_to_grayscale(image_path):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not read image at {image_path}")
        return None
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return gray_img

def process_dataset_grayscale(dataset_path, save=True):
    for subdir, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(subdir, file)
                gray_img = convert_to_grayscale(image_path)
                if gray_img is not None and save:
                    cv2.imwrite(image_path, gray_img)

# Example usage
dataset_path = '/content/drive/My Drive/Tubes_PCD/Augmented_Dataset'
process_dataset_grayscale(dataset_path)

In [None]:
# prompt: bagi data augmented_dataset diatas menjadi 80% training, 10% test, 10% validation

import os
import shutil
from sklearn.model_selection import train_test_split

def split_dataset(dataset_path, output_path, train_ratio=0.8, val_ratio=0.1, test_ratio=0.1):
    """Splits a dataset into training, validation, and testing sets.

    Args:
        dataset_path: Path to the dataset directory.
        output_path: Path to the output directory for the split dataset.
        train_ratio: Ratio of the dataset to be used for training.
        val_ratio: Ratio of the dataset to be used for validation.
        test_ratio: Ratio of the dataset to be used for testing.
    """

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    for subdir, _, files in os.walk(dataset_path):
        class_name = os.path.basename(subdir)
        if class_name == os.path.basename(dataset_path): # Skip the root dir
          continue

        image_paths = [os.path.join(subdir, f) for f in files if f.endswith(('.png', '.jpg', '.jpeg'))]
        train_images, temp_images = train_test_split(image_paths, train_size=train_ratio, random_state=42)
        val_images, test_images = train_test_split(temp_images, test_size=test_ratio / (val_ratio + test_ratio), random_state=42)

        for split, images in zip(['train', 'val', 'test'], [train_images, val_images, test_images]):
            split_dir = os.path.join(output_path, split, class_name)
            if not os.path.exists(split_dir):
                os.makedirs(split_dir)
            for img_path in images:
                shutil.copy(img_path, split_dir)

# Example usage:
dataset_path = '/content/drive/My Drive/Tubes_PCD/Augmented_Dataset' # Path to your augmented dataset
output_path = '/content/drive/My Drive/Tubes_PCD/Splitted_Augmented_Dataset'  # Output directory

split_dataset(dataset_path, output_path)

**4. Model CNN**

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

In [None]:
train_dir = '/content/drive/My Drive/Tubes_PCD/Splitted_Augmented_Dataset/train'
val_dir = '/content/drive/My Drive/Tubes_PCD/Splitted_Dataset/Val'
test_dir = '/content/drive/My Drive/Tubes_PCD/Splitted_Dataset/test'

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Dense, Activation,Dropout,Conv2D, MaxPooling2D,BatchNormalization, Flatten,Input
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model, load_model, Sequential
from keras.callbacks import ModelCheckpoint
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import os
import seaborn as sns
sns.set_style('darkgrid')
from sklearn.metrics import confusion_matrix, classification_report
import cv2
from google.colab import drive
from tensorflow.keras import layers, models
from tensorflow.keras.applications import VGG16

In [None]:
base_model=tf.keras.applications.VGG16(include_top=False, weights="imagenet",input_tensor=Input(shape=(128,128,3)))

In [None]:
base_model.summary()

In [None]:
base_model.trainable = False

In [None]:
# Parameter
img_shape = (128, 128, 3)
batch_size = 32
class_count = 4  # Sesuaikan jumlah kelas sesuai dengan dataset Anda

# Direktori dataset
train_dir = '/content/drive/My Drive/Tubes_PCD/Splitted_Augmented_Dataset/train'
val_dir = '/content/drive/My Drive/Tubes_PCD/Splitted_Augmented_Dataset/val'
test_dir = '/content/drive/My Drive/Tubes_PCD/Splitted_Augmented_Dataset/test'

# Augmentasi Data Training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Data Validation dan Test hanya dilakukan rescaling
val_test_datagen = ImageDataGenerator(rescale=1./255)

# Pipeline Data
train_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=batch_size,
    class_mode='categorical'
)

val_data = val_test_datagen.flow_from_directory(
    val_dir,
    target_size=(128, 128),
    batch_size=batch_size,
    class_mode='categorical'
)

test_data = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=(128, 128),
    batch_size=batch_size,
    class_mode='categorical'
)

# Arsitektur Model
model = tf.keras.Sequential([
    # Konvolusi pertama
    tf.keras.layers.Conv2D(filters=64, input_shape=img_shape, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

    # Konvolusi kedua
    tf.keras.layers.Conv2D(filters=8, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

    # Konvolusi ketiga
    tf.keras.layers.Conv2D(filters=4, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

    # Konvolusi keempat
    tf.keras.layers.Conv2D(filters=96, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(class_count, activation='softmax')
])

# Kompilasi Model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Training Model
history = model.fit(
    train_data,
    epochs=25,  # Sesuaikan jumlah epoch
    validation_data=val_data
)

# Evaluasi Model
loss, accuracy = model.evaluate(test_data)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

Found 979 images belonging to 4 classes.
Found 131 images belonging to 4 classes.
Found 131 images belonging to 4 classes.
Epoch 1/25
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 1s/step - accuracy: 0.2466 - loss: 1.3888 - val_accuracy: 0.3130 - val_loss: 1.3852
Epoch 2/25
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 1s/step - accuracy: 0.2593 - loss: 1.3867 - val_accuracy: 0.3130 - val_loss: 1.3849
Epoch 3/25
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 1s/step - accuracy: 0.2621 - loss: 1.3862 - val_accuracy: 0.3130 - val_loss: 1.3832
Epoch 4/25
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 1s/step - accuracy: 0.2724 - loss: 1.3856 - val_accuracy: 0.3130 - val_loss: 1.3831
Epoch 5/25
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 1s/step - accuracy: 0.2742 - loss: 1.3852 - val_accuracy: 0.2290 - val_loss: 1.3763
Epoch 6/25
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 1