In [None]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
import os
import csv


# 경로 설정
dataset_path = "./Pistachio_Image_Dataset/Pistachio_Image_Dataset"

# 클래스 폴더 이름들
class_folders = ["Kirmizi_Pistachio", "Siirt_Pistachio"]

import pandas as pd
df=pd.read_csv('data.csv')

In [None]:
from PIL import Image
import cv2
import numpy as np

# 이미지 파일들이 있는 기본 경로
image_base_path = "Pistachio_Image_Dataset/Pistachio_Image_Dataset"

# 데이터프레임에서 클래스와 이미지 파일 이름 가져오기
X = df[['Class', 'ImageFileName']]

# 이미지 크기 조정할 때 사용할 크기
new_size = (120, 120)

# 이미지를 불러와 데이터셋으로 만들기
def load_and_preprocess_image(row):
    class_name = row['Class']
    image_name = row['ImageFileName']
    image_path = os.path.join(image_base_path, class_name, image_name)

    # 이미지 불러오기
    image = Image.open(image_path)

    # 이미지 크기 조정
    image = image.resize(new_size)

    # 이미지를 NumPy 배열로 변환
    image_array = np.array(image)

    # 이미지를 0에서 1로 정규화
    image_array = image_array / 255.0

    return image_array

# 데이터프레임의 각 행에 대해 이미지를 불러와 데이터셋으로 만들기
X['ImageData'] = X.apply(load_and_preprocess_image, axis=1)

# 이미지 데이터를 NumPy 배열로 변환
X_data = np.stack(X['ImageData'].to_numpy())

# 클래스 레이블을 NumPy 배열로 변환
y_data = X['Class'].to_numpy()

# 결과 확인
print("Image Data Shape:", X_data.shape)
print("Class Labels Shape:", y_data.shape)


In [None]:
from sklearn.model_selection import train_test_split

# 이미지 데이터(X_data)와 클래스 레이블(y_data)을 train과 test로 데이터 나누기
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, stratify=y_data, random_state=123)

# 나뉜 데이터셋을 확인
print("Train Image Data Shape:", X_train.shape)
print("Test Image Data Shape:", X_test.shape)
print("Train Class Labels Shape:", y_train.shape)
print("Test Class Labels Shape:", y_test.shape)

In [None]:
class_mapping = {'Kirmizi_Pistachio': 0, 'Siirt_Pistachio': 1}
y_train_numerical = np.array([class_mapping[label] for label in y_train])
y_test_numerical = np.array([class_mapping[label] for label in y_test])

In [None]:
# One hot encoded
y_train = to_categorical(y_train_numerical)
y_test = to_categorical(y_test_numerical)


In [None]:
import warnings
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

# Suppress TensorFlow and Keras warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# Fix random seed for reproducibility
seed = 100
np.random.seed(seed)
num_classes = 2

def cnn_model():
    # Define model
    model = Sequential()
    model.add(Conv2D(100, kernel_size=(3, 3), padding='same', strides=(1, 1), input_shape=(new_size[0], new_size[1], 3),
                     activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))
    model.add(Conv2D(64, kernel_size=(3, 3), padding='same', strides=(1, 1), input_shape=(100, 100, 3),
                     activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.3))
    model.add(Conv2D(32, kernel_size=(3, 3), padding='same', strides=(1, 1), input_shape=(64, 64, 3),
                     activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.3))
    
    model.add(Flatten())
    model.add(Dense(127, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = cnn_model()

# Fit the model without data augmentation
disp = model.fit(
    x=X_train,
    y=y_train,
    batch_size=60,
    validation_split=0.2,
    epochs=100,
    verbose=1
)

scores = model.evaluate(X_test, y_test, verbose=0)

print("Test Set - Loss: {:.4f}, Accuracy: {:.2f}%".format(scores[0], scores[1] * 100))


In [None]:


# Data augmentation
datagen = ImageDataGenerator(
    zoom_range=0.2,
    shear_range=0.2,
    rotation_range=10,
    fill_mode='nearest',
    validation_split=0.2 
)
datagen.fit(X_train)

train_generator = datagen.flow(X_train, y_train, batch_size=60, subset='training')
validation_generator = datagen.flow(X_train, y_train, batch_size=60, subset='validation')

# Fix random seed for reproducibility
seed = 100
np.random.seed(seed)
num_classes = 2

def cnn_model():
    # Define model
    model = Sequential()
    model.add(Conv2D(100, kernel_size=(3, 3), padding='same', strides=(1, 1), input_shape=(new_size[0], new_size[1], 3),
                     activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))
    model.add(Conv2D(64, kernel_size=(3, 3), padding='same', strides=(1, 1), input_shape=(100, 100, 3),
                     activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.3))
    model.add(Conv2D(32, kernel_size=(3, 3), padding='same', strides=(1, 1), input_shape=(64, 64, 3),
                     activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.3))
    
    model.add(Flatten())
    model.add(Dense(127, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = cnn_model()

# Fit the model
disp = model.fit_generator(
    generator=train_generator,
    validation_data=validation_generator,
    steps_per_epoch=len(train_generator),
    validation_steps=len(validation_generator),
    epochs=100,
    use_multiprocessing=True,
    workers=-1,
    verbose=1
)


scores = model.evaluate(X_test, y_test, verbose=0)

print("Test Set - Loss: {:.4f}, Accuracy: {:.2f}%".format(scores[0], scores[1] * 100))

In [None]:
# Summarize history for accuracy
plt.plot(disp.history['accuracy'])
plt.plot(disp.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
