In [1]:
import matplotlib.pyplot as plt
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import train_test_split
import numpy as np
from PIL import Image

In [11]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split

# Define constants
image_folder = r"C:\Users\farel\Documents\Dataslayer\datagen_baru"
image_size = (224, 224)  # Adjust this based on your CNN model's input size
label_position = 0  # Label is assumed to be before the first underscore

# Initialize lists to store data and labels
X = []
y = []

# Load and process images
for filename in os.listdir(image_folder):
    if filename.endswith(('.png', '.jpg', '.jpeg')):  # Filter for image files
        # Extract label
        label = filename.split('_')[label_position]
        y.append(label)
        
        # Load and preprocess image
        img_path = os.path.join(image_folder, filename)
        image = load_img(img_path, target_size=image_size)  # Resize image
        image_array = img_to_array(image) / 255.0  # Normalize pixel values
        X.append(image_array)



In [12]:
# Convert lists to numpy arrays
X = np.array(X)
y = np.array(y)
y = y.astype(int)

# Split data into train and test sets (and optionally validation set)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Output the shapes to verify
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


X_train shape: (6152, 224, 224, 3), y_train shape: (6152,)
X_test shape: (1538, 224, 224, 3), y_test shape: (1538,)


In [13]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, GlobalAveragePooling2D, GlobalMaxPooling2D, BatchNormalization, Input
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

def build_cnn_model(input_shape=(224, 224, 3)):
    model = Sequential()
    model.add(Input(shape=input_shape))
    # First convolutional layer
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
    #model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    
    # Second convolutional layer
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    #model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    
    # # Third convolutional layer
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
    #model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    

    model.add(Flatten())
    
    # Fully connected layer
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))  # Dropout for regularization
    # model.add(Dense(32, activation='relu'))
    # model.add(Dropout(0.5))  # Dropout for regularization
    
    # Output layer (sigmoid for binary classification)
    model.add(Dense(1, activation='sigmoid'))
    
    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.0001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model

# Example usage:
model = build_cnn_model()
model.summary()


In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=0.00001)
# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=15,
    batch_size=32,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)


# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_accuracy}")


Epoch 1/15
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 683ms/step - accuracy: 0.6745 - loss: 0.5794 - val_accuracy: 0.7263 - val_loss: 0.5112 - learning_rate: 1.0000e-04
Epoch 2/15
[1m 97/193[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m1:01[0m 640ms/step - accuracy: 0.7319 - loss: 0.5085

In [5]:
from sklearn.model_selection import KFold
import numpy as np

# Number of folds
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store results
fold_accuracies = []
fold_losses = []

# Convert data to numpy arrays if they aren't already
X_train = np.array(X)
y_train = np.array(y)

# Loop through each fold
fold = 1
for train_index, val_index in kf.split(X_train):
    print(f"Training Fold {fold}...")
    
    # Split the data into training and validation sets for this fold
    X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
    y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]
    
    # Build a new model instance
    model = build_cnn_model(input_shape=(224, 224, 3))
    
    # Callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=0.00001)
    
    # Train the model
    history = model.fit(
        X_train_fold, y_train_fold,
        validation_data=(X_val_fold, y_val_fold),
        epochs=15,
        batch_size=32,
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )
    
    # Evaluate the model on the validation set for this fold
    val_loss, val_accuracy = model.evaluate(X_val_fold, y_val_fold, verbose=0)
    print(f"Fold {fold} - Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}")
    
    # Store the results
    fold_losses.append(val_loss)
    fold_accuracies.append(val_accuracy)
    
    fold += 1

# Print overall results
print(f"Mean Validation Loss: {np.mean(fold_losses)}")
print(f"Mean Validation Accuracy: {np.mean(fold_accuracies)}")


Training Fold 1...
Epoch 1/15
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 620ms/step - accuracy: 0.6521 - loss: 0.6161 - val_accuracy: 0.6905 - val_loss: 0.5454 - learning_rate: 1.0000e-04
Epoch 2/15
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 598ms/step - accuracy: 0.6945 - loss: 0.5602 - val_accuracy: 0.7243 - val_loss: 0.5195 - learning_rate: 1.0000e-04
Epoch 3/15
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 601ms/step - accuracy: 0.7323 - loss: 0.5193 - val_accuracy: 0.7581 - val_loss: 0.4837 - learning_rate: 1.0000e-04
Epoch 4/15
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m118s[0m 611ms/step - accuracy: 0.7562 - loss: 0.4927 - val_accuracy: 0.7698 - val_loss: 0.4621 - learning_rate: 1.0000e-04
Epoch 5/15
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 656ms/step - accuracy: 0.7834 - loss: 0.4418 - val_accuracy: 0.7952 - val_loss: 0.4480 - learning_rate: 1.0000e-04
Epoch 6/15


KeyboardInterrupt: 

In [7]:
import pandas as pd
submission1 = pd.read_csv(r"C:\Users\farel\Documents\Dataslayer\submission_1.csv")
submission1['label'] = 1
submission1.to_csv(r"C:\Users\farel\Documents\Dataslayer\submission1.csv", index=False)