# Data Science z Python 3.10. Konwersja modeli (fast.ai & TensorFlow)
## 🇬🇧 Data Science with Python 3.10. Model conversion (fast.ai & TensorFlow)
#### 👨‍🏫 PhD Wojciech Oronowicz-Jaśkowiak
#### 🤖 https://github.com/aipogodzinach

## Zadanie 1.
### Task 1.

In [1]:
# pip install numpy keras tensorflow tf2onnx

import os
import numpy as np
import shutil
import keras

from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.applications import ResNet50

In [2]:
base_dir = 'dataset_x5'
data_dir = 'data_dir'

raw_no_of_files = {}
classes = ['class1', 'class2', 'class3']
for dir in classes:
    raw_no_of_files[dir] = len(os.listdir(os.path.join(base_dir, dir)))
raw_no_of_files.items()

dict_items([('class1', 111), ('class2', 114), ('class3', 136)])

In [3]:
if not os.path.exists(data_dir):
    os.mkdir(data_dir)

train_dir = os.path.join(data_dir, 'train')
valid_dir = os.path.join(data_dir, 'valid')
test_dir = os.path.join(data_dir, 'test')

train_class1_dir = os.path.join(train_dir, '1')
train_class2_dir = os.path.join(train_dir, '2')
train_class3_dir = os.path.join(train_dir, '3')

valid_class1_dir = os.path.join(valid_dir, '1')
valid_class2_dir = os.path.join(valid_dir, '2')
valid_class3_dir = os.path.join(valid_dir, '3')

test_class1_dir = os.path.join(test_dir, '1')
test_class2_dir = os.path.join(test_dir, '2')
test_class3_dir = os.path.join(test_dir, '3')

for directory in (train_dir, valid_dir, test_dir):
    if not os.path.exists(directory):
        os.mkdir(directory)

dirs = [train_class1_dir, train_class2_dir, train_class3_dir,
        valid_class1_dir, valid_class2_dir, valid_class3_dir,
        test_class1_dir, test_class2_dir, test_class3_dir]

for dir in dirs:
    if not os.path.exists(dir):
        os.mkdir(dir)        

In [4]:
class1_fnames = os.listdir(os.path.join(base_dir, 'class1'))
class2_fnames = os.listdir(os.path.join(base_dir, 'class2'))
class3_fnames = os.listdir(os.path.join(base_dir, 'class3'))

class1_fnames = [fname for fname in class1_fnames if fname.split('.')[1].lower() in ['jpg', 'png', 'jpeg']]
class2_fnames = [fname for fname in class2_fnames if fname.split('.')[1].lower() in ['jpg', 'png', 'jpeg']]
class3_fnames = [fname for fname in class3_fnames if fname.split('.')[1].lower() in ['jpg', 'png', 'jpeg']]

In [5]:
size = min(len(class1_fnames), len(class2_fnames), len(class3_fnames))

train_size = int(np.floor(0.7 * size))
valid_size = int(np.floor(0.2 * size))
test_size = size - train_size - valid_size

train_idx = train_size
valid_idx = train_size + valid_size
test_idx = train_size + valid_size + test_size

In [6]:
for i, fname in enumerate(class1_fnames):
    if i <= train_idx:
        src = os.path.join(base_dir, 'class1', fname)
        dst = os.path.join(train_class1_dir, fname)
        shutil.copyfile(src, dst)
    elif train_idx < i <= valid_idx:
        src = os.path.join(base_dir, 'class1', fname)
        dst = os.path.join(valid_class1_dir, fname)
        shutil.copyfile(src, dst)
    elif valid_idx < i < test_idx:
        src = os.path.join(base_dir, 'class1', fname)
        dst = os.path.join(test_class1_dir, fname)
        shutil.copyfile(src, dst)

for i, fname in enumerate(class2_fnames):
    if i <= train_idx:
        src = os.path.join(base_dir, 'class2', fname)
        dst = os.path.join(train_class2_dir, fname)
        shutil.copyfile(src, dst)
    elif train_idx < i <= valid_idx:
        src = os.path.join(base_dir, 'class2', fname)
        dst = os.path.join(valid_class2_dir, fname)
        shutil.copyfile(src, dst)
    elif valid_idx < i < test_idx:
        src = os.path.join(base_dir, 'class2', fname)
        dst = os.path.join(test_class2_dir, fname)
        shutil.copyfile(src, dst) 

for i, fname in enumerate(class3_fnames):
    if i <= train_idx:
        src = os.path.join(base_dir, 'class3', fname)
        dst = os.path.join(train_class3_dir, fname)
        shutil.copyfile(src, dst)
    elif train_idx < i <= valid_idx:
        src = os.path.join(base_dir, 'class3', fname)
        dst = os.path.join(valid_class3_dir, fname)
        shutil.copyfile(src, dst)
    elif valid_idx < i < test_idx:
        src = os.path.join(base_dir, 'class3', fname)
        dst = os.path.join(test_class3_dir, fname)
        shutil.copyfile(src, dst)        

In [7]:
train_datagen = ImageDataGenerator()

valid_datagen = ImageDataGenerator(rescale=1./255.)

train_generator = train_datagen.flow_from_directory(directory=train_dir,
                                                   target_size=(150, 150),
                                                   batch_size=32,
                                                   class_mode='categorical')

valid_generator = valid_datagen.flow_from_directory(directory=valid_dir,
                                                   target_size=(150, 150),
                                                   batch_size=32,
                                                   class_mode='categorical')

Found 234 images belonging to 3 classes.
Found 66 images belonging to 3 classes.


In [8]:
batch_size = 20
steps_per_epoch = train_size // batch_size
validation_steps = valid_size // batch_size

In [9]:
conv_base = ResNet50(weights='imagenet', include_top=False, input_shape=(150, 150, 3))
conv_base.trainable = True

def print_layers(model):
    for layer in model.layers:
        print(f'layer_name: {layer.name:13} trainable: {layer.trainable}')

print_layers(conv_base)

layer_name: input_1       trainable: True
layer_name: conv1_pad     trainable: True
layer_name: conv1_conv    trainable: True
layer_name: conv1_bn      trainable: True
layer_name: conv1_relu    trainable: True
layer_name: pool1_pad     trainable: True
layer_name: pool1_pool    trainable: True
layer_name: conv2_block1_1_conv trainable: True
layer_name: conv2_block1_1_bn trainable: True
layer_name: conv2_block1_1_relu trainable: True
layer_name: conv2_block1_2_conv trainable: True
layer_name: conv2_block1_2_bn trainable: True
layer_name: conv2_block1_2_relu trainable: True
layer_name: conv2_block1_0_conv trainable: True
layer_name: conv2_block1_3_conv trainable: True
layer_name: conv2_block1_0_bn trainable: True
layer_name: conv2_block1_3_bn trainable: True
layer_name: conv2_block1_add trainable: True
layer_name: conv2_block1_out trainable: True
layer_name: conv2_block2_1_conv trainable: True
layer_name: conv2_block2_1_bn trainable: True
layer_name: conv2_block2_1_relu trainable: True
la

In [10]:
set_trainable = False
for layer in conv_base.layers:
    if layer.name == 'block5_conv1':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

print_layers(conv_base)

layer_name: input_1       trainable: False
layer_name: conv1_pad     trainable: False
layer_name: conv1_conv    trainable: False
layer_name: conv1_bn      trainable: False
layer_name: conv1_relu    trainable: False
layer_name: pool1_pad     trainable: False
layer_name: pool1_pool    trainable: False
layer_name: conv2_block1_1_conv trainable: False
layer_name: conv2_block1_1_bn trainable: False
layer_name: conv2_block1_1_relu trainable: False
layer_name: conv2_block1_2_conv trainable: False
layer_name: conv2_block1_2_bn trainable: False
layer_name: conv2_block1_2_relu trainable: False
layer_name: conv2_block1_0_conv trainable: False
layer_name: conv2_block1_3_conv trainable: False
layer_name: conv2_block1_0_bn trainable: False
layer_name: conv2_block1_3_bn trainable: False
layer_name: conv2_block1_add trainable: False
layer_name: conv2_block1_out trainable: False
layer_name: conv2_block2_1_conv trainable: False
layer_name: conv2_block2_1_bn trainable: False
layer_name: conv2_block2_1_re

In [11]:
model = Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(units=256, activation='relu'))
model.add(layers.Dense(units=3, activation='softmax'))

model.compile(optimizer=optimizers.RMSprop(lr=1e-5),
             loss='categorical_crossentropy',
             metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 5, 5, 2048)        23587712  
                                                                 
 flatten (Flatten)           (None, 51200)             0         
                                                                 
 dense (Dense)               (None, 256)               13107456  
                                                                 
 dense_1 (Dense)             (None, 3)                 771       
                                                                 
Total params: 36,695,939
Trainable params: 13,108,227
Non-trainable params: 23,587,712
_________________________________________________________________


  super().__init__(name, **kwargs)


In [12]:
import sys
from PIL import Image
sys.modules['Image'] = Image 

In [13]:
model.fit(train_generator, epochs=1, steps_per_epoch=2)

2022-12-04 14:53:07.351689: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz




<keras.callbacks.History at 0x296d19090>

In [14]:
model.save('saved_model/my_model2')



INFO:tensorflow:Assets written to: saved_model/my_model2/assets


INFO:tensorflow:Assets written to: saved_model/my_model2/assets


In [15]:
new_model = keras.models.load_model('saved_model/my_model2')
new_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 5, 5, 2048)        23587712  
                                                                 
 flatten (Flatten)           (None, 51200)             0         
                                                                 
 dense (Dense)               (None, 256)               13107456  
                                                                 
 dense_1 (Dense)             (None, 3)                 771       
                                                                 
Total params: 36,695,939
Trainable params: 13,108,227
Non-trainable params: 23,587,712
_________________________________________________________________


In [19]:
# hacky solution
# for some reason I had to upgrade protobuf to 4.21.10
# copy internal/builder.py to temporary storage
# downgrade protobuf to 3.19.4
# copy builder.py from temporary storage to internal/builder.py
# protobuf is garbage
!!python -m tf2onnx.convert --saved-model saved_model/my_model2 --output tfmodel.onnx

 '2022-12-04 14:54:38,157 - INFO - Signatures found in model: [serving_default].',
 "2022-12-04 14:54:38,158 - INFO - Output names: ['dense_1']",
 '2022-12-04 14:54:41,071 - INFO - Using tensorflow=2.10.0, onnx=1.12.0, tf2onnx=1.13.0/2c1db5',
 '2022-12-04 14:54:41,071 - INFO - Using opset <onnx, 13>',
 '2022-12-04 14:54:41,342 - INFO - Computed 0 values for constant folding',
 '2022-12-04 14:54:41,867 - INFO - Optimizing ONNX model',
 '2022-12-04 14:54:43,441 - INFO - After optimization: Add -1 (19->18), BatchNormalization -53 (53->0), Cast -1 (1->0), Const -161 (273->112), Identity -2 (2->0), Transpose -212 (214->2)',
 '2022-12-04 14:54:43,496 - INFO - ',
 '2022-12-04 14:54:43,496 - INFO - Successfully converted TensorFlow model saved_model/my_model2 to ONNX',
 "2022-12-04 14:54:43,496 - INFO - Model inputs: ['resnet50_input']",
 "2022-12-04 14:54:43,496 - INFO - Model outputs: ['dense_1']",
 '2022-12-04 14:54:43,496 - INFO - ONNX model is saved at tfmodel.onnx']

## Zadanie 2.
### Task 2.

In [20]:
# pip install fastai==2.5.3 fastbook==0.0.18 torch==1.10.0 torchvision==0.11.1 seeme

from fastbook import *
from fastai.vision.widgets import *

import pathlib
import fastai
from pathlib import Path

temp = pathlib.PosixPath
# pathlib.PosixPath = pathlib.WindowsPath

path = Path('dataset_x5')

classes = 'class1', 'class2', 'class3'

data = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_items=get_image_files,
    splitter=RandomSplitter(valid_pct=0.2,seed=42),
    get_y=parent_label,
    item_tfms=Resize(128)
)

data = data.new(
    item_tfms=RandomResizedCrop(224, min_scale=0.5),
    batch_tfms=aug_transforms(mult=0.0, do_flip=False, flip_vert=False, max_rotate=0.0, min_zoom=0.0, max_zoom=0.0, max_lighting=0.0, max_warp=0.0, p_affine=0.0, p_lighting=0.0, xtra_tfms=None, size=None, mode='bilinear', pad_mode='border', align_corners=True, batch=False, min_scale=1.0))

dls = data.dataloaders(path, bs = 32)

learn = cnn_learner(dls, resnet18, metrics=error_rate)

learn.fit_one_cycle(1)
learn.export(fname="model.pkl")

  warn("`cnn_learner` has been renamed to `vision_learner` -- please update your code")
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/adamjedrzejewski/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

epoch,train_loss,valid_loss,error_rate,time
0,0.839371,0.058481,0.013889,00:18


In [22]:
import torch as torch

model_file = open("model.pkl", 'rb')
model = torch.load(model_file, map_location=torch.device('cpu'))

model_eval = model.eval()
dummy_image = torch.randn(1, 3, 64, 64)
torch.onnx.export(model_eval, dummy_image, "model.onnx")