In [1]:
import numpy as np

data = np.load("./train.npz")
test = np.load("./test.npz")
print(data.files)
print(test.files)


x_train = data['x']
y_train = data['y']
print(x_train.shape)
print(y_train.shape)

x_train = x_train.reshape(-1, 500, 500, 1) /255.0
test_data = test['x'].reshape(-1, 500, 500, 1) / 255.0

['x', 'y']
['x']
(150, 500, 500)
(150,)


In [2]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

print(y_train[:10])
le = LabelEncoder()
y_train = le.fit_transform(y_train)
#y_test = le.transform(y_test)

y_list = ['normal', 'pneumonia']
print(y_train[:10])

['normal' 'normal' 'normal' 'pneumonia' 'normal' 'normal' 'pneumonia'
 'pneumonia' 'normal' 'normal']
[0 0 0 1 0 0 1 1 0 0]


In [3]:


# train_datagen.fit(x_train) 
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# x_train, y_train을 훈련 데이터와 검증 데이터로 분리 (0.1 = 10% 검증 데이터)
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

# # 데이터 증강을 위한 ImageDataGenerator 설정
# datagen = ImageDataGenerator(
#     rescale=1./255,
#     rotation_range=30,
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     shear_range=0.2,
#     zoom_range=0.2,
#     horizontal_flip=True,
#     fill_mode='nearest'
# )

# # 훈련 데이터와 검증 데이터에 대한 generator 생성
# train_generator = datagen.flow(x_train, y_train, batch_size=32)
# val_generator = datagen.flow(x_val, y_val, batch_size=32)



In [4]:
from keras import regularizers
from keras.regularizers import l2
from tensorflow import keras

from keras import Sequential, Input
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam

In [None]:
model = Sequential()

# Input Layer (500x500 이미지 사용)
model.add(Input(shape=(500, 500, 1)))

# Block 1
#model.add(Conv2D(32, kernel_size=3, activation="relu", padding="same"))
#model.add(BatchNormalization())  # BatchNormalization 추가
#model.add(MaxPooling2D(2, padding="valid"))

# Block 2
model.add(Conv2D(32, kernel_size=3, activation="relu", padding="same"))
model.add(BatchNormalization())  # BatchNormalization 추가
model.add(MaxPooling2D(2, padding="valid"))

# Block 3
model.add(Conv2D(64, kernel_size=3, activation="relu", padding="same"))
model.add(BatchNormalization())  # BatchNormalization 추가
model.add(MaxPooling2D(2, padding="valid"))

# Flatten and Dense
model.add(Flatten())
model.add(Dense(128, activation="relu", kernel_regularizer=regularizers.l2(0.01)))  # Dense 크기 증가
model.add(Dropout(0.5))  # 과적합 방지용 Dropout

# Output Layer
model.add(Dense(1, activation="sigmoid"))

# Compile Model
model.compile(
    optimizer=Adam(learning_rate=0.0001),  # 적당한 학습률 설정
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

# EarlyStopping, Checkpoint 설정
checkpoint_cb = ModelCheckpoint("best-cnn-model.keras", save_best_only=True)
early_stopping_cb = EarlyStopping(patience=5, restore_best_weights=True)

# 모델 학습
model.fit(x_train, y_train, epochs=6, validation_data=(x_test, y_test), callbacks=[checkpoint_cb, early_stopping_cb])
#history = model.fit(x_train, y_train, epochs=20, validation_data = (x_test, y_test), callbacks=[checkpoint_cb, early_stopping_cb])

In [10]:
from keras.utils import plot_model

plot_model(model)
plot_model(model, show_shapes=True, to_file="cnn_architecture.png", dpi=300)

You must install pydot (`pip install pydot`) for `plot_model` to work.
You must install pydot (`pip install pydot`) for `plot_model` to work.


In [11]:
y_pred = model.predict(test_data)
print(y_pred)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 164ms/step
[[0.9156888 ]
 [0.01001256]
 [0.86515594]
 [0.5514888 ]
 [0.99961275]
 [0.0066095 ]
 [0.02759365]
 [0.7745929 ]
 [0.9920656 ]
 [0.24772073]
 [0.56874675]
 [0.00909606]
 [0.02350555]
 [0.00506587]
 [0.01333003]
 [0.99114615]
 [0.99701715]
 [0.02624184]
 [0.89767146]
 [0.98276496]
 [0.30371258]
 [0.00556863]
 [0.95717585]
 [0.03781898]
 [0.00542069]
 [0.02063952]
 [0.0024453 ]
 [0.5687903 ]
 [0.9994073 ]
 [0.9985802 ]
 [0.9771901 ]
 [0.9511467 ]
 [0.97859323]
 [0.01971503]
 [0.22975416]
 [0.9976799 ]
 [0.95527494]
 [0.02012039]]


In [12]:
import numpy as np

y_pred_classes = (y_pred > 0.5).astype(int)
print(y_pred_classes)
print(len(y_pred_classes))

[[1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]]
38


In [13]:
y_pred_classes = y_pred_classes.flatten().tolist()
y = [y_list[i] for i in y_pred_classes]
print(y)

['pneumonia', 'normal', 'pneumonia', 'pneumonia', 'pneumonia', 'normal', 'normal', 'pneumonia', 'pneumonia', 'normal', 'pneumonia', 'normal', 'normal', 'normal', 'normal', 'pneumonia', 'pneumonia', 'normal', 'pneumonia', 'pneumonia', 'normal', 'normal', 'pneumonia', 'normal', 'normal', 'normal', 'normal', 'pneumonia', 'pneumonia', 'pneumonia', 'pneumonia', 'pneumonia', 'pneumonia', 'normal', 'normal', 'pneumonia', 'pneumonia', 'normal']


In [14]:
import pandas as pd
import numpy as np

df = pd.read_csv("submission.csv")

df.dropna(axis=1, inplace=True)

# 아래 "np.arange(0, df.shape[0], 1)" 부분을, 솜솜이가 만든 모델이 예측한 값으로 대체!
#y_pred = np.arange(0, df.shape[0], 1) 

df["result"] = y
df.to_csv("new_submission.csv", index=False)