In [None]:
import os 
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np 
from tensorflow import keras
from tensorflow.python.keras.callbacks import EarlyStopping
from IPython.display import Image, display
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.layers import Dense, Input, Embedding, Dropout, concatenate,Conv1D, GlobalMaxPooling1D,Concatenate
import PIL
from PIL import ImageFile
from PIL import ImageOps
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
input_dir = 'C:/Users/oceanlightai/Desktop/datasets/pet_skin/train/train_image3'
target_dir = 'C:/Users/oceanlightai/Desktop/datasets/pet_skin/train/train_mask3'
img_size = (256,256)
num_classes = 6
batch_size = 32
MAX_WORDS = 10000  
SEQUENCE_LENGTH = 1  
BREED_CLASSES = 50  
LESIONS_CLASSES = 6
LOCATION_CLASSES = 4

input_img_paths = sorted([os.path.join(input_dir,fname)
                         for fname in os.listdir(input_dir)
                         if fname.endswith('.jpg')])

target_img_paths = sorted([os.path.join(target_dir,fname)
                         for fname in os.listdir(target_dir)
                         if fname.endswith('.jpg') and not fname.startswith('.')])

In [None]:
class SkinDiseaseDataset(keras.utils.Sequence):
    def __init__(self, batch_size, img_size, input_img_paths, target_img_paths, text_data):
        # text_data 추가
        self.batch_size = batch_size
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = target_img_paths
        self.text_data = text_data

        
    def __len__(self):
        return len(self.target_img_paths) // self.batch_size
    
    
    def __getitem__(self, idx):
        i = idx * self.batch_size
        batch_input_img_paths = self.input_img_paths[i:i+self.batch_size]
        batch_target_img_paths = self.target_img_paths[i:i+self.batch_size]
        batch_text_data = self.text_data[i:i+self.batch_size]  # text_data 처리
        for j, path in enumerate(batch_input_img_paths):
            img = load_img(path, target_size=self.img_size)
            x[j] = img_to_array(img) / 255.  # input normalization
        y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype='uint8')
        for j, path in enumerate(batch_target_img_paths):
            img = load_img(path, target_size=self.img_size, color_mode='grayscale')
            img = img_to_array(img) // 51  # 255 / 5 = 51으로 나누면 0~5 범위를 얻을 수 있습니다.
            y[j] = img
        return [x, batch_text_data], y  # 반환 값 변경

In [None]:
def process_texts(texts):
    tokenizer = Tokenizer(num_words=MAX_WORDS)
    tokenizer.fit_on_texts(texts)
    sequences = tokenizer.texts_to_sequences(texts)
    return pad_sequences(sequences, maxlen=SEQUENCE_LENGTH)

In [None]:
label_encoder = LabelEncoder()

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
import pandas as pd

your_dataframe = pd.read_csv('C:/Users/oceanlightai/Desktop/datasets/pet_skin/train/train_skin.csv')

# Remove 'Raw data ID' and 'polygon_location' columns
your_dataframe = your_dataframe.drop(columns=['Raw data ID', 'polygon_location'])

# Apply LabelEncoder to each categorical variable
le = LabelEncoder()
your_dataframe['breed'] = le.fit_transform(your_dataframe['breed'])
your_dataframe['gender'] = le.fit_transform(your_dataframe['gender'])
your_dataframe['region'] = le.fit_transform(your_dataframe['region'])

# Define the model
model = DecisionTreeClassifier()

# Prepare the data (X: features, y: target 'lesions')
X = your_dataframe.drop(['lesions'], axis=1)
y = your_dataframe['lesions']

# Train the model
model.fit(X, y)

# Print feature importances
feature_importances = model.feature_importances_
for name, importance in zip(X.columns, feature_importances):
    print(f'Feature: {name}, Importance: {importance}')

In [None]:
def process_data(data, image_base_path):
    image_data = process_images(data['Raw data ID'], image_base_path)
    age_data = data['age'].values

    breed_labels = to_categorical(label_encoder.fit_transform(data['breed']))
    lesions_labels = to_categorical(label_encoder.fit_transform(data['lesions']))
    region_labels = to_categorical(label_encoder.fit_transform(data['region']))

    return image_data, age_data,  lesions_labels, region_labels

In [None]:
def get_unet(img_size, num_classes):
    inputs = keras.Input(shape=img_size + (3,))
    
    x = layers.Conv2D(32, 3, strides=2, padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    previous_block_activation = x 
    
    for filters in [64, 128, 256]:
        x = layers.Activation('relu')(x)
        x = layers.SeparableConv2D(filters, 3, padding='same')(x)
        x = layers.BatchNormalization()(x)
        
        x = layers.Activation('relu')(x)
        x = layers.SeparableConv2D(filters, 3, padding='same')(x)
        x = layers.BatchNormalization()(x)
        
        x = layers.MaxPooling2D(3, strides=2, padding='same')(x)
        
        residual = layers.Conv2D(filters, 1, strides=2, padding='same')(previous_block_activation)
        x = layers.add([x, residual])
        previous_block_activation = x
        
    for filters in [256, 128, 64, 32]:
        x = layers.Activation('relu')(x)
        x = layers.Conv2DTranspose(filters, 3, padding='same')(x)
        x = layers.BatchNormalization()(x)
        
        x = layers.Activation('relu')(x)
        x = layers.Conv2DTranspose(filters, 3, padding='same')(x)
        x = layers.BatchNormalization()(x)
        
        x = layers.UpSampling2D(2)(x)
        
        residual = layers.UpSampling2D(2)(previous_block_activation)
        residual = layers.Conv2D(filters, 1, padding='same')(residual)
        x = layers.add([x, residual])
        previous_block_activation = x 
        
    outputs = layers.Conv2D(num_classes, 3, activation='relu', padding='same')(x)  # activation을 'relu'로 변경
    x = layers.GlobalMaxPooling2D()(outputs)  # GlobalMaxPooling2D 추가
    model = keras.Model(inputs, x)  # outputs 대신 x를 사용
    return model

In [None]:
EMBEDDING_DIM = 50

In [None]:
input_text = Input(shape=(1,), dtype='int32')
# 범주 임베딩
x = Embedding(input_dim=7, output_dim=EMBEDDING_DIM)(input_text)
x = Dense(128, activation='relu')(input_text)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)
text_model = Model(inputs=input_text, outputs=x)

In [None]:
unet_model = get_unet((256, 256), num_classes=6)

In [None]:
combined_input = concatenate([unet_model.output, text_model.output])

In [None]:
output_lesions = Dense(LESIONS_CLASSES, activation='softmax', name='lesions')(combined_input)

In [None]:
model = Model(inputs=[unet_model.input, text_model.input], outputs=[output_lesions])

In [None]:
model.summary()

In [None]:
from tensorflow.keras.optimizers import Adam
model.compile(optimizer=Adam(learning_rate=1e-6), loss='categorical_crossentropy', metrics=['accuracy'])
callbacks = [keras.callbacks.ModelCheckpoint('Unet_segmentation_1.h5', save_best_only=True)]

In [None]:
# 1개
history = model.fit(train_gen, validation_data=val_gen, epochs=30)