<a href="https://colab.research.google.com/github/UG-isLab/introduction/blob/main/src/introduction/task2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# データセットの読み込み
## 圧縮ファイルを展開する

一度だけ

In [19]:
!pip install tqdm



In [20]:
# import tarfile
# from tqdm import tqdm #進捗状況を視覚化

# tar_path = '/content/drive/MyDrive/Dataset/train.tar.gz'
# extract_path = '/content/drive/MyDrive/Dataset/train'

# with tarfile.open(tar_path, 'r:gz') as tar:
#     members = tar.getmembers()
#     for member in tqdm(members, desc="Extracting files"):
#         tar.extract(member, path=extract_path)


In [21]:
# tar_path = '/content/drive/MyDrive/Dataset/test.tar.gz'
# extract_path = '/content/drive/MyDrive/Dataset/test'

# with tarfile.open(tar_path, 'r:gz') as tar:
#     members = tar.getmembers()
#     for member in tqdm(members, desc="Extracting files"):
#         tar.extract(member, path=extract_path)

In [22]:
# tar_path = '/content/drive/MyDrive/Dataset/val.tar.gz'
# extract_path = '/content/drive/MyDrive/Dataset/valid'

# with tarfile.open(tar_path, 'r:gz') as tar:
#     members = tar.getmembers()
#     for member in tqdm(members, desc="Extracting files"):
#         tar.extract(member, path=extract_path)


## データセットを成形

In [23]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os

train_dir = '/content/drive/MyDrive/Dataset/train/train'
valid_dir = '/content/drive/MyDrive/Dataset/valid/val'
test_dir = '/content/drive/MyDrive/Dataset/test/test'

画像サイズとバッチサイズの設定

In [24]:
img_size = (64, 64)
batch_size = 32

In [25]:
# 画像データセット読み込み
#サブフォルダの名前によって自動的にラベリングしてくれる
train_ds = keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=img_size,
    batch_size=batch_size,
    shuffle=True
)

Found 8980 files belonging to 2 classes.


In [26]:
# クラス数取得
num_classes = len(train_ds.class_names)
print(f"クラス名: {train_ds.class_names}")

クラス名: ['0', '1']


In [27]:
test_ds = keras.utils.image_dataset_from_directory(
    test_dir,
    image_size=img_size,
    batch_size=batch_size,
    shuffle=True
)

Found 2458 files belonging to 2 classes.


In [28]:
print(test_ds.class_names)

['0', '1']


In [29]:
valid_ds = keras.utils.image_dataset_from_directory(
    valid_dir,
    image_size=img_size,
    batch_size=batch_size,
    shuffle=True
)

Found 1448 files belonging to 2 classes.


In [30]:
print(valid_ds.class_names)

['0', '1']


In [31]:
AUTOTUNE = tf.data.AUTOTUNE

# 正規化
normalization_layer = layers.Rescaling(1./255)

# 特徴量を正規化してらべるはそのまま
# データ読み込みと学習を並列化
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y)).prefetch(buffer_size=AUTOTUNE)
valid_ds = valid_ds.map(lambda x, y: (normalization_layer(x), y)).prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y)).prefetch(buffer_size=AUTOTUNE)


In [32]:
# 活性化関数はレル関数
# フィルタサイズ32 カーネルサイズ3
#入力は64,64,3
#レイヤーは畳み込み層＋プーリング層を3セット＋全結合層2層
#最後にソフトマックス関数で出力
model = keras.Sequential([
    layers.Conv2D(32, 3, activation='relu', input_shape=(img_size[0], img_size[1], 3)),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])

#モデルのコンパイル
# 最適化方法はアダム
#損失関数はクロスエントロピー
#評価指数は正解率
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


In [33]:
# アーリーストッピングを設定
#検証データの損失関数を監視
#3エポック連続で皆瀬しなかったら終了
#止まった場合、過去の最もよかったモデルにする
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

# 学習
#エポック数20
history = model.fit(
    train_ds,
    validation_data=valid_ds,
    epochs=20,
    callbacks=[early_stopping]
)


Epoch 1/20
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 618ms/step - accuracy: 0.8079 - loss: 0.4240 - val_accuracy: 0.8688 - val_loss: 0.3914
Epoch 2/20
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 372ms/step - accuracy: 0.9466 - loss: 0.1783 - val_accuracy: 0.8819 - val_loss: 0.3281
Epoch 3/20
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 373ms/step - accuracy: 0.9662 - loss: 0.1265 - val_accuracy: 0.8826 - val_loss: 0.3976
Epoch 4/20
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 367ms/step - accuracy: 0.9677 - loss: 0.1228 - val_accuracy: 0.8743 - val_loss: 0.3280
Epoch 5/20
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 364ms/step - accuracy: 0.9724 - loss: 0.0986 - val_accuracy: 0.8833 - val_loss: 0.3709
Epoch 6/20
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 348ms/step - accuracy: 0.9720 - loss: 0.0908 - val_accuracy: 0.8826 - val_loss: 0.3592
Epoc

In [34]:
# 評価
test_loss, test_acc = model.evaluate(test_ds)
print(f"Test Accuracy: {test_acc:.4f}")


[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 169ms/step - accuracy: 0.9272 - loss: 0.2206
Test Accuracy: 0.9276
