In [25]:
import numpy as np

data = np.load("./train.npz")
test = np.load("./test.npz")
print(data.files)
print(test.files)


x_train = data['x']
y_train = data['y']
print(x_train.shape)
print(y_train.shape)

x_train = x_train.reshape(-1, 500, 500, 1) /255.0
x_test = test['x'].reshape(-1, 500, 500, 1) / 255.0

['x', 'y']
['x']
(150, 500, 500)
(150,)


In [27]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

print(y_train[:10])
le = LabelEncoder()
y_train = le.fit_transform(y_train)
#y_test = le.transform(y_test)

y_list = ['normal', 'pneumonia']
print(y_train[:10])

['normal' 'normal' 'normal' 'pneumonia' 'normal' 'normal' 'pneumonia'
 'pneumonia' 'normal' 'normal']
[0 0 0 1 0 0 1 1 0 0]


In [103]:
from tensorflow import keras

from keras import Sequential, Input
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.callbacks import ModelCheckpoint, EarlyStopping

model = Sequential()
model.add(Input(shape=(500,500,1)))
model.add(Conv2D(32, kernel_size=3, activation="relu", padding="same"))

model.add(MaxPooling2D(2))

model.add(Conv2D(64, kernel_size = 3, activation="relu", padding="same"))
model.add(MaxPooling2D(2))
model.add(Conv2D(128, kernel_size = 3, activation="relu", padding="same"))
model.add(MaxPooling2D(2))
model.add(Conv2D(128, kernel_size = 3, activation="relu", padding="same"))
model.add(BatchNormalization())
model.add(MaxPooling2D(2))
model.add(Flatten())
model.add(Dense(100, activation="relu"))
model.add(Dropout(rate=0.4))
model.add(Dense(1, activation="sigmoid"))

model.summary()

In [30]:
from keras.utils import plot_model

plot_model(model)
plot_model(model, show_shapes=True, to_file="cnn_architecture.png", dpi=300)

You must install graphviz (see instructions at https://graphviz.gitlab.io/download/) for `plot_model` to work.
You must install graphviz (see instructions at https://graphviz.gitlab.io/download/) for `plot_model` to work.


In [34]:

model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)
checkpoint_cb = ModelCheckpoint("best-cnn-model.keras", save_best_only = True)
early_stopping_cb = EarlyStopping(patience=2, restore_best_weights = True)
model.fit(x_train, y_train, epochs=21)
#history = model.fit(x_train, y_train, epochs=20, validation_data = (x_test, y_test), callbacks=[checkpoint_cb, early_stopping_cb])

Epoch 1/21
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2s/step - accuracy: 0.5067 - loss: 19.0072
Epoch 2/21
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - accuracy: 0.5333 - loss: 5.7627
Epoch 3/21
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - accuracy: 0.5800 - loss: 0.8829
Epoch 4/21
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 0.8467 - loss: 0.3989
Epoch 5/21
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - accuracy: 0.9333 - loss: 0.2391
Epoch 6/21
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - accuracy: 0.9000 - loss: 0.2251
Epoch 7/21
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - accuracy: 0.9400 - loss: 0.1681
Epoch 8/21
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - accuracy: 0.9533 - loss: 0.1171
Epoch 9/21
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[

<keras.src.callbacks.history.History at 0x17dcb486cc0>

In [44]:
y_pred = model.predict(x_test)
print(y_pred)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
[[9.9999756e-01]
 [7.5978496e-06]
 [9.9901760e-01]
 [9.9908602e-01]
 [9.9989855e-01]
 [4.6147291e-11]
 [3.3237867e-02]
 [9.9246186e-01]
 [9.9964446e-01]
 [1.8823357e-01]
 [7.3247063e-01]
 [6.7364458e-10]
 [8.5349212e-09]
 [5.9492938e-10]
 [3.2255923e-10]
 [1.0000000e+00]
 [9.9932474e-01]
 [4.7997231e-04]
 [9.9999601e-01]
 [9.9990606e-01]
 [2.9815099e-01]
 [4.8346171e-11]
 [9.9976635e-01]
 [1.7548124e-09]
 [7.4749629e-10]
 [8.4767229e-04]
 [2.9052142e-08]
 [9.8521686e-01]
 [9.9999863e-01]
 [9.9941409e-01]
 [9.9969971e-01]
 [1.0000000e+00]
 [5.1556295e-01]
 [2.9775973e-10]
 [2.9075427e-06]
 [9.9999768e-01]
 [9.9990344e-01]
 [9.0252755e-08]]


In [81]:
import numpy as np

y_pred_classes = (y_pred > 0.5).astype(int)
print(y_pred_classes)
print(len(y_pred_classes))

[[1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]]
38


In [93]:
y_pred_classes = y_pred_classes.flatten().tolist()
y = [y_list[i] for i in y_pred_classes]
print(y)

['pneumonia', 'normal', 'pneumonia', 'pneumonia', 'pneumonia', 'normal', 'normal', 'pneumonia', 'pneumonia', 'normal', 'pneumonia', 'normal', 'normal', 'normal', 'normal', 'pneumonia', 'pneumonia', 'normal', 'pneumonia', 'pneumonia', 'normal', 'normal', 'pneumonia', 'normal', 'normal', 'normal', 'normal', 'pneumonia', 'pneumonia', 'pneumonia', 'pneumonia', 'pneumonia', 'pneumonia', 'normal', 'normal', 'pneumonia', 'pneumonia', 'normal']


In [95]:
import pandas as pd
import numpy as np

df = pd.read_csv("submission.csv")

df.dropna(axis=1, inplace=True)

# 아래 "np.arange(0, df.shape[0], 1)" 부분을, 솜솜이가 만든 모델이 예측한 값으로 대체!
#y_pred = np.arange(0, df.shape[0], 1) 

df["result"] = y
df.to_csv("new_submission.csv", index=False)