In [45]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [48]:
"/content/drive/MyDrive/deep/images_001.zip"

'/content/drive/MyDrive/deep/images_001.zip'

In [51]:
import zipfile

# zip_pathをzipファイルへの正しいパスに修正します
# The zip_path should point to the zip file itself, not a directory inside it.
zip_path = "/content/drive/MyDrive/deep/images_001.zip"

with zipfile.ZipFile(zip_path, "r") as zip_ref:
  # 必要であれば、extractallの引数で解凍先のディレクトリを指定します
  # Specify the extraction directory if needed
  zip_ref.extractall("archive")

# print は引数がないと何も表示しません。何か表示したい場合は引数を追加してください。
# If you want to print something, add arguments to the print function.
# print("Zip file extracted successfully.")

In [56]:
import os
print(os.listdir("archive")) # List the contents of the extracted directory

['images_001', '__MACOSX']


In [57]:
import pandas as pd

csv_path = "/content/drive/MyDrive/deep/Data_Entry_2017.csv"

df = pd.read_csv(csv_path)

df.head

In [58]:
import pandas as pd

df = pd.read_csv("/content/drive/MyDrive/deep/Data_Entry_2017.csv")

# Massを含むかどうかで癌(1)　or 癌じゃない(0)のラベルを作る
df["Cancer"] = df["Finding Labels"].apply(lambda x: 1 if "Mass" in x else 0)

print(df["Cancer"].value_counts())

df[["Image Index", "Finding Labels", "Cancer"]].head()

Cancer
0    106338
1      5782
Name: count, dtype: int64


Unnamed: 0,Image Index,Finding Labels,Cancer
0,00000001_000.png,Cardiomegaly,0
1,00000001_001.png,Cardiomegaly|Emphysema,0
2,00000001_002.png,Cardiomegaly|Effusion,0
3,00000002_000.png,No Finding,0
4,00000003_000.png,Hernia,0


In [59]:

import os
from tqdm import tqdm
import cv2
import numpy as np

image_dir = "images_001/images"

images = []
labels = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
  filename = row["Image Index"]
  label = row["Cancer"]
  image_path = os.path.join(image_dir, filename)



  if os.path.exists(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img,(128, 128))
    images.append(img)
    labels.append(label)


X = np.array(images)
y = np.array(labels)


print("画像データの形:", len(X))
print("ラベルの形:", len(y))

100%|██████████| 112120/112120 [00:08<00:00, 12730.24it/s]

画像データの形: 0
ラベルの形: 0





In [63]:
import os
from tqdm import tqdm
import cv2
import numpy as np

# image_dirを修正して、解凍先のディレクトリ内の画像ディレクトリを指すようにします
# Fix image_dir to point to the image directory within the extraction directory
image_dir = "archive/images_001/images"

images = []
labels = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
  filename = row["Image Index"]
  label = row["Cancer"]
  image_path = os.path.join(image_dir, filename)



  if os.path.exists(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img,(128, 128))
    images.append(img)
    labels.append(label)


X = np.array(images)
y = np.array(labels)


print("画像データの形:", len(X))
print("ラベルの形:", len(y))



100%|██████████| 112120/112120 [01:25<00:00, 1313.37it/s] 

画像データの形: 4999
ラベルの形: 4999





In [64]:
print("Xの中身の数:",len(X))
print("ラベルの数:", len(y))

Xの中身の数: 4999
ラベルの数: 4999


In [65]:
from sklearn.model_selection import train_test_split

X = X / 255.0

X = X.reshape(-1, 128, 128, 1)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


print("学習データ数:", len(X_train))
print("テストデータ数:", len(X_test))

学習データ数: 3999
テストデータ数: 1000


In [70]:
import tensorflow as tf
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Conv2D(32,(3,3), activation='relu', input_shape=(128, 128, 1)),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64,(3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

In [73]:
history = model.fit(X_train, y_train, epochs=5, batch_size=32,
                    validation_data=(X_test, y_test))

Epoch 1/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 796ms/step - accuracy: 0.9284 - loss: 0.2042 - val_accuracy: 0.9680 - val_loss: 0.1467
Epoch 2/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 798ms/step - accuracy: 0.9686 - loss: 0.1453 - val_accuracy: 0.9680 - val_loss: 0.1516
Epoch 3/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 797ms/step - accuracy: 0.9618 - loss: 0.1676 - val_accuracy: 0.9680 - val_loss: 0.1522
Epoch 4/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 795ms/step - accuracy: 0.9648 - loss: 0.1594 - val_accuracy: 0.9680 - val_loss: 0.1628
Epoch 5/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 807ms/step - accuracy: 0.9695 - loss: 0.1337 - val_accuracy: 0.9680 - val_loss: 0.1615


In [74]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print("テスト精度:", test_acc)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 186ms/step - accuracy: 0.9680 - loss: 0.1653
テスト精度: 0.9679999947547913
