In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models, mixed_precision
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2_as_graph

2026-01-04 01:07:35.749715: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  if not hasattr(np, "object"):


In [2]:
# --- 1. 高速化設定 (Mixed Precision) ---
mixed_precision.set_global_policy('mixed_float16')

In [3]:
# --- 2. FLOPs計算関数の定義 ---
def calculate_flops(model):
    # 入力サイズに合わせてダミー入力の型を定義
    input_signature = [tf.TensorSpec(shape=(1,) + model.input_shape[1:], dtype=tf.float32)]
    
    # モデルを関数化してグラフを固定
    full_model = tf.function(lambda x: model(x))
    concrete_func = full_model.get_concrete_function(input_signature)

    # 変数を定数に変換してグラフを凍結
    frozen_func, _ = convert_variables_to_constants_v2_as_graph(concrete_func)

    # プロファイリング設定
    run_meta = tf.compat.v1.RunMetadata()
    opts = tf.compat.v1.profiler.ProfileOptionBuilder.float_operation()
    
    # 計算実行
    flops = tf.compat.v1.profiler.profile(
        graph=frozen_func.graph,
        run_meta=run_meta, 
        cmd='op', 
        options=opts
    )
    return flops.total_float_ops

In [4]:
# --- 3. データセットの準備 (CIFAR-10) ---
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

# 正規化
X_train = X_train.astype(np.float32) / 255.0
X_test = X_test.astype(np.float32) / 255.0

# データセットパイプライン作成
BATCH_SIZE = 256
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)) \
    .shuffle(5000) \
    .batch(BATCH_SIZE) \
    .prefetch(tf.data.AUTOTUNE)

test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)) \
    .batch(BATCH_SIZE) \
    .prefetch(tf.data.AUTOTUNE)

  d = cPickle.load(f, encoding="bytes")
I0000 00:00:1767456472.025670    3834 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3582 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6
2026-01-04 01:07:52.039482: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 614400000 exceeds 10% of free system memory.
2026-01-04 01:07:55.338920: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 614400000 exceeds 10% of free system memory.


In [5]:
# --- 4. データ拡張層の定義 ---
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomTranslation(0.1, 0.1)
])

In [6]:
# --- 5. モデル構造の定義 (ResNet50V2版) ---
def build_cnn_body(x):
    # include_top=False: 全結合層を除外
    # weights=None: スクラッチ学習
    # pooling='avg': 最終出力をGlobalAveragePoolingで平坦化
    base_model = tf.keras.applications.ResNet50V2(
        include_top=False,
        weights=None,
        input_tensor=x,
        input_shape=(32, 32, 3),
        pooling='avg'
    )
    
    y = base_model.output
    # VGGと異なり、既にpooling='avg'を指定しているためFlatten等は不要
    
    # 出力層
    y = layers.Dense(10, activation="softmax", dtype='float32')(y)
    
    return y

In [7]:
# --- 6. モデルの構築 ---

# (A) 学習用モデル (データ拡張あり)
inputs_train = layers.Input((32, 32, 3))
x_aug = data_augmentation(inputs_train) # データ拡張を通す
outputs_train = build_cnn_body(x_aug)   # 共通ボディへ
model = models.Model(inputs_train, outputs_train, name="Training_Model")

# (B) 測定・推論用モデル (データ拡張なし)
inputs_test = layers.Input((32, 32, 3))
outputs_test = build_cnn_body(inputs_test) # データ拡張を通さず共通ボディへ
model_inference = models.Model(inputs_test, outputs_test, name="Inference_Model")

In [8]:
# --- 7. ベースライン(基準)のFLOPs測定 ---
print("\n" + "="*40)
print("【ベースラインモデル 計算量測定】")
print("="*40)
try:
    # 測定用モデルを使って計算
    flops_val = calculate_flops(model_inference)
    params = model_inference.count_params()
    
    print(f"パラメータ数: {params:,}")
    print(f"FLOPs (推論1回あたり): {flops_val / 10**9:.4f} G (ギガ)")
except Exception as e:
    print(f"FLOPs計算エラー: {e}")
print("="*40 + "\n")


【ベースラインモデル 計算量測定】


Expected: keras_tensor_200
Received: inputs=['Tensor(shape=(1, 32, 32, 3))']
I0000 00:00:1767456482.333583    3834 devices.cc:67] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
I0000 00:00:1767456482.334003    3834 single_machine.cc:376] Starting new session
I0000 00:00:1767456482.335094    3834 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3582 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


Instructions for updating:
This API was designed for TensorFlow v1. See https://www.tensorflow.org/guide/migrate for instructions on how to migrate your code to TensorFlow v2.

-max_depth                  10000
-min_bytes                  0
-min_peak_bytes             0
-min_residual_bytes         0
-min_output_bytes           0
-min_micros                 0
-min_accelerator_micros     0
-min_cpu_micros             0
-min_params                 0
-min_float_ops              1
-min_occurrence             0
-step                       -1
-order_by                   float_ops
-account_type_regexes       .*
-start_name_regexes         .*
-trim_name_regexes          
-show_name_regexes          .*
-hide_name_regexes          
-account_displayed_op_only  true
-select                     float_ops
-output                     stdout:

パラメータ数: 23,585,290
FLOPs (推論1回あたり): 0.1428 G (ギガ)


Doc:
op: The nodes are operation kernel type, such as MatMul, Conv2D. Graph nodes belonging to the same type 

In [9]:
# --- 8. コンパイルと学習 ---
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

print("学習を開始します...")
history = model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=50,
    callbacks=[early_stop]
)

学習を開始します...
Epoch 1/50


2026-01-04 01:08:13.605490: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 614400000 exceeds 10% of free system memory.
2026-01-04 01:09:13.031140: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91701


[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 698ms/step - accuracy: 0.3557 - loss: 1.8092 - val_accuracy: 0.2148 - val_loss: 2.2624
Epoch 2/50
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 651ms/step - accuracy: 0.4571 - loss: 1.5195 - val_accuracy: 0.4207 - val_loss: 1.7096
Epoch 3/50
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 655ms/step - accuracy: 0.4449 - loss: 1.5871 - val_accuracy: 0.2429 - val_loss: 5.7234
Epoch 4/50
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 658ms/step - accuracy: 0.4903 - loss: 1.4357 - val_accuracy: 0.3718 - val_loss: 4.2399
Epoch 5/50
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 655ms/step - accuracy: 0.5359 - loss: 1.3093 - val_accuracy: 0.5092 - val_loss: 1.4523
Epoch 6/50
[1m  9/196[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:58[0m 634ms/step - accuracy: 0.5515 - loss: 1.2896

KeyboardInterrupt: 

In [None]:
# --- 9. グラフ描画 ---
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(len(acc))



plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()