In [1]:
import os

# Pacotes

In [2]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.applications.efficientnet import preprocess_input
from pathlib import Path

2022-05-31 19:07:31.415207: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [3]:
# Tamanho imagem
img_rows, img_cols = 600, 600

input_shape = (img_rows, img_cols, 3)

# Batch size

batch_size = 32

# seed
seed = 42

path_dataset = Path('/scratch/dpfurtado/backup_home/corais/1_classificacao_train_50/dataset/1_train_50')
path_train = path_dataset/'train'
path_train_dev = path_dataset/'train-dev'
path_test = path_dataset/'test_1_2'

#Arquivos npy
path_dir_dataset = './dataset'
npz = path_dir_dataset + '/extracao_efficientnetb7' 

In [4]:
train_dataset = image_dataset_from_directory(
        path_train,
        image_size=(img_rows, img_cols),
        color_mode="rgb",
        batch_size=batch_size,
        shuffle=False) # Manter Shuffle = False, pois não haverá treinamento
train_dev_dataset_concat = image_dataset_from_directory(
        path_train_dev,
        image_size=(img_rows, img_cols),
        color_mode="rgb",
        batch_size=batch_size,
        shuffle=False)
test_dataset = image_dataset_from_directory(
        path_test,
        image_size=(img_rows, img_cols),
        color_mode="rgb",
        batch_size=batch_size,
        shuffle=False)

Found 788 files belonging to 7 classes.


2022-05-31 19:07:40.988745: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-05-31 19:07:40.991029: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-05-31 19:07:41.964349: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:3b:00.0 name: NVIDIA GeForce GTX TITAN Black computeCapability: 3.5
coreClock: 0.98GHz coreCount: 15 deviceMemorySize: 5.94GiB deviceMemoryBandwidth: 312.92GiB/s
2022-05-31 19:07:41.965138: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 1 with properties: 
pciBusID: 0000:af:00.0 name: NVIDIA GeForce GTX TITAN Black computeCapability: 3.5
coreClock: 0.98GHz coreCount: 15 deviceMemorySize: 5.94GiB deviceMemoryBandwidth: 312.92GiB/s
2022-05-31 19:07:41.965178: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcu

Found 106 files belonging to 7 classes.
Found 168 files belonging to 7 classes.


In [5]:
train_dataset = train_dataset.concatenate(train_dev_dataset_concat)

In [6]:
AUTOTUNE = tf.data.AUTOTUNE

train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE) 
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)

In [7]:
cnn = EfficientNetB7(weights='imagenet', include_top=False, input_shape=input_shape)
inputs = keras.Input(shape=input_shape)
x = preprocess_input(inputs)
x = cnn(x)
output = GlobalAveragePooling2D()(x)
model = Model(inputs, output)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 600, 600, 3)]     0         
_________________________________________________________________
efficientnetb7 (Functional)  (None, 19, 19, 2560)      64097687  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2560)              0         
Total params: 64,097,687
Trainable params: 63,786,960
Non-trainable params: 310,727
_________________________________________________________________


In [8]:
model.save('./model/extrator_cnn/')

2022-05-31 19:08:40.333940: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: ./model/extrator_cnn/assets


In [9]:
X_train = model.predict(train_dataset)
X_test = model.predict(test_dataset)

2022-05-31 19:09:47.312221: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-05-31 19:09:47.341030: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2200000000 Hz
2022-05-31 19:10:01.084403: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7
2022-05-31 19:10:05.117660: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-05-31 19:11:42.047416: W tensorflow/core/common_runtime/bfc_allocator.cc:248] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.60GiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.


In [10]:
print(X_train.shape)
print(X_test.shape)

(894, 2560)
(168, 2560)


In [11]:
y_train = np.concatenate([y for x, y in train_dataset], axis=0)
y_test = np.concatenate([y for x, y in test_dataset], axis=0)

In [12]:
print(y_train.shape)
print(y_test.shape)

(894,)
(168,)


In [13]:
np.savez_compressed(npz, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)