<a href="https://colab.research.google.com/github/applejxd/colaboratory/blob/master/ml/TensorFlowCNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## データの読み込み



In [1]:
import tensorflow as tf
print(f"TensorFlow version: {tf.__version__}")

TensorFlow version: 2.8.2


In [2]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

print(x_train.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
(60000, 28, 28)


## 通常の分類問題として解く

In [3]:
# Dense レイヤ = アファインレイヤ (& 活性化関数)
simple_model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(50, activation='sigmoid'),
  tf.keras.layers.Dense(100, activation='sigmoid'),
  tf.keras.layers.Dense(10),
  tf.keras.layers.Softmax()
])
simple_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 50)                39250     
                                                                 
 dense_1 (Dense)             (None, 100)               5100      
                                                                 
 dense_2 (Dense)             (None, 10)                1010      
                                                                 
 softmax (Softmax)           (None, 10)                0         
                                                                 
Total params: 45,360
Trainable params: 45,360
Non-trainable params: 0
_________________________________________________________________


In [4]:
print(x_train[:1].shape)

# 標本毎にロジットを計算
predictions = simple_model(x_train[:1]).numpy()
predictions

(1, 28, 28)


array([[0.03711247, 0.03759292, 0.05975355, 0.38344875, 0.07513633,
        0.13086529, 0.04521748, 0.08587728, 0.0883568 , 0.05663909]],
      dtype=float32)

In [5]:
# 交差エントロピーによる損失関数
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

loss_fn(y_train[:1], predictions).numpy()

2.2769313

In [6]:
# 学習の構成を指定
simple_model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

In [7]:
# 学習を実施
simple_model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f8dbbda5650>

In [8]:
# 評価
simple_model.evaluate(x_test,  y_test, verbose=2)

313/313 - 1s - loss: 1.5071 - accuracy: 0.9577 - 552ms/epoch - 2ms/step


[1.50706148147583, 0.9577000141143799]

## CNN で解く

In [9]:
# kernel_size = filter_size
cnn_model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(filters=30, kernel_size=(5, 5), 
                         padding="valid", strides=1, activation="relu",
                         input_shape=(28, 28, 1)),
  tf.keras.layers.MaxPool2D((2, 2)),
  tf.keras.layers.Flatten(input_shape=(12, 12)),
  tf.keras.layers.Dense(100, activation="relu"),
  tf.keras.layers.Dense(10),
  tf.keras.layers.Softmax()
])
cnn_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 24, 24, 30)        780       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 12, 12, 30)       0         
 )                                                               
                                                                 
 flatten_1 (Flatten)         (None, 4320)              0         
                                                                 
 dense_3 (Dense)             (None, 100)               432100    
                                                                 
 dense_4 (Dense)             (None, 10)                1010      
                                                                 
 softmax_1 (Softmax)         (None, 10)                0         
                                                      

In [10]:
# 学習の構成を指定
cnn_model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
# 学習を実施
cnn_model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f8dbbbf5c10>

In [11]:
# 評価
cnn_model.evaluate(x_test,  y_test, verbose=2)

313/313 - 2s - loss: 1.4759 - accuracy: 0.9852 - 2s/epoch - 6ms/step


[1.4759294986724854, 0.9851999878883362]

## PyCaret を使用

In [15]:
import numpy as np
import pandas as pd

def flatten(data):
    flat_data = np.array([
        [mat[row, col] 
            for row in range(mat.shape[0]) 
            for col in range(mat.shape[1])]
         for mat in data])
    return flat_data

x_flat_train = flatten(x_train)
x_flat_test = flatten(x_test)

header = list([f"cell_{num:0>3d}" for num in range(0, 784)])
header.append("label")

train_df = pd.DataFrame(
    np.hstack([x_flat_train, np.array([y_train]).T]), columns=header)
test_df = pd.DataFrame(
    np.hstack([x_flat_test, np.array([y_test]).T]), columns=header)

In [16]:
%%capture
# install pycaret and hide the log by %%capture
!pip install --upgrade pycaret pandas-profiling==3.1.0

In [18]:
from pycaret import classification

exp = classification.setup(
    data=train_df, target="label", session_id=123, silent=True,
    # 相関関係から数値変数の重複を削除
    remove_multicollinearity = True, multicollinearity_threshold = 0.95, 
    pca=True, pca_method="linear")

Unnamed: 0,Description,Value
0,session_id,123
1,Target,label
2,Target Type,Multiclass
3,Label Encoded,
4,Original Data,"(60000, 785)"
5,Missing Values,False
6,Numeric Features,766
7,Categorical Features,18
8,Ordinal Features,False
9,High Cardinality Features,False


In [20]:
best = classification.compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
nb,Naive Bayes,0.8047,0.9642,0.8029,0.8294,0.811,0.783,0.7848,0.698
