# 日本語手紙文字OCRサンプル

## OpenVINOのインストールディレクトリからオリジナルのサンプルコード関連ファイルをコピー

In [None]:
!cp $INTEL_OPENVINO_DIR/inference_engine/demos/python_demos/handwritten_japanese_recognition_demo/requirements.txt .
!cp $INTEL_OPENVINO_DIR/inference_engine/demos/python_demos/handwritten_japanese_recognition_demo/models.lst .
!cp -r $INTEL_OPENVINO_DIR/inference_engine/demos/python_demos/handwritten_japanese_recognition_demo/utils .
!cp -r $INTEL_OPENVINO_DIR/inference_engine/demos/python_demos/handwritten_japanese_recognition_demo/data .

## Pythonライブラリをインストール

In [None]:
!pip install -r requirements.txt

## 事前学習済みモデルをダウンロード

In [None]:
!python3 $INTEL_OPENVINO_DIR/deployment_tools/tools/model_downloader/downloader.py --list models.lst

## ライブラリをインポート

In [None]:
from __future__ import print_function
import os
import sys
import time
import io
import cv2
import logging as log
import numpy as np
from PIL import Image
import PIL

from openvino.inference_engine import IECore
from utils.codec import CTCCodec

import IPython.display
from IPython.display import clear_output

## 実装

In [None]:
class JapaneseHandwrittenOCR:
    def __init__(self, model_path):
        # Plugin initialization
        ie = IECore()
    
        # Setup OpenVINO's IE
        model = model_path
        net = ie.read_network(model, os.path.splitext(model)[0] + ".bin")
        self.input_blob = next(iter(net.input_info))
        self.out_blob = next(iter(net.outputs))
        self.input_batch_size, self.input_channel, self.input_height, self.input_width = net.input_info[self.input_blob].input_data.shape
        self.exec_net = ie.load_network(network=net, device_name="CPU")
        
        # Setup codec
        self.codec = CTCCodec(self.__get_characters__(), None, 20)
    
    def __get_characters__(self):
        '''Get characters'''
        charlist = "data/kondate_nakayosi_char_list.txt"
        with open(charlist, 'r', encoding='utf-8') as f:
            return ''.join(line.strip('\n') for line in f)

    def __preprocess_input__(self, image_name, height, width):
        src = cv2.imread(image_name, cv2.IMREAD_GRAYSCALE)
        ratio = float(src.shape[1]) / float(src.shape[0])
        tw = int(height * ratio)
        rsz = cv2.resize(src, (tw, height), interpolation=cv2.INTER_AREA).astype(np.float32)
        # [h,w] -> [c,h,w]
        img = rsz[None, :, :]
        _, h, w = img.shape
        # right edge padding
        pad_img = np.pad(img, ((0, 0), (0, height - h), (0, width -  w)), mode='edge')
        return pad_img
    
    def infer(self, image_path):
        # Read and pre-process input image (NOTE: one image only)
        input_path = image_path
        input_image = self.__preprocess_input__(input_path, height=self.input_height, width=self.input_width)[None,:,:,:]
        
        preds = self.exec_net.infer(inputs={self.input_blob: input_image})
        result = self.codec.decode(preds[self.out_blob])
        
        return result

## 実行

In [None]:
image_path = "data/test.png"

img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
f = io.BytesIO()
PIL.Image.fromarray(img).save(f, 'jpeg')
IPython.display.display(IPython.display.Image(data=f.getvalue()))

ocr = JapaneseHandwrittenOCR("intel/handwritten-japanese-recognition-0001/FP32/handwritten-japanese-recognition-0001.xml")
result = ocr.infer(image_path)
print(result)

---

## OpenVINO™ Model Serverを利用
ここからは手書き文字認識モデルをOpenVINO Model Serverでマイクロサービス化して外出しにします。Model ServerとはgRPCを介してコミュニケーションを取ります。

### OpenVINO Model Serverのセットアップ
OpenVINO Model ServerをホストOS上で稼働させる手順です。こちらを実行後に以降の作業を進めてください。
1. Model ServerのDockerイメージをダウンロード
```Bash
docker pull openvino/model_server:latest
```
1. 手書き文字認識用モデル（XMLファイルとBINファイル）を適当なフォルダへ格納
1. Model Serverを起動
```Bash
docker run -d --rm -v C:\Users\hiroshi\model\ocr:/models/ocr/1 -p 9000:9000 openvino/model_server:latest --model_path /models/ocr --model_name ocr --port 9000 --log_level DEBUG  --shape auto
```
※"C:\Users\hiroshi\model\ocr"にモデルが格納されているとした場合

### モジュールのインストール

In [None]:
!pip install tensorflow-serving-api

### 実装

In [None]:
import cv2
import datetime
import grpc
import numpy as np
import os
import io
from tensorflow import make_tensor_proto, make_ndarray
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc, get_model_metadata_pb2

from PIL import Image
import PIL

from utils.codec import CTCCodec

import IPython.display
from IPython.display import clear_output

class RemoteJapaneseHandwrittenOCR:
    def __init__(self, grpc_address='localhost', grpc_port=9000, model_name='ocr', model_version=None):
        
        #Settings for accessing model server
        self.grpc_address = grpc_address
        self.grpc_port = grpc_port
        self.model_name = model_name
        self.model_version = model_version
        channel = grpc.insecure_channel("{}:{}".format(self.grpc_address, self.grpc_port))
        self.stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
        
        # Get input shape info from Model Server
        self.input_name, input_shape, self.output_name, output_shape = self.__get_input_name_and_shape__()
        self.input_height = input_shape[2]
        self.input_width = input_shape[3]
        
        # Setup codec
        self.codec = CTCCodec(self.__get_characters__(), None, 20)
    
    def __get_input_name_and_shape__(self):
        metadata_field = "signature_def"
        request = get_model_metadata_pb2.GetModelMetadataRequest()
        request.model_spec.name = self.model_name
        if self.model_version is not None:
            request.model_spec.version.value = self.model_version
        request.metadata_field.append(metadata_field)

        result = self.stub.GetModelMetadata(request, 10.0) # result includes a dictionary with all model outputs
        input_metadata, output_metadata = self.__get_input_and_output_meta_data__(result)
        input_blob = next(iter(input_metadata.keys()))
        output_blob = next(iter(output_metadata.keys()))
        return input_blob, input_metadata[input_blob]['shape'], output_blob, output_metadata[output_blob]['shape']
    
    def __get_input_and_output_meta_data__(self, response):
        signature_def = response.metadata['signature_def']
        signature_map = get_model_metadata_pb2.SignatureDefMap()
        signature_map.ParseFromString(signature_def.value)
        serving_default = signature_map.ListFields()[0][1]['serving_default']
        serving_inputs = serving_default.inputs
        input_blobs_keys = {key: {} for key in serving_inputs.keys()}
        tensor_shape = {key: serving_inputs[key].tensor_shape
                        for key in serving_inputs.keys()}
        for input_blob in input_blobs_keys:
            inputs_shape = [d.size for d in tensor_shape[input_blob].dim]
            tensor_dtype = serving_inputs[input_blob].dtype
            input_blobs_keys[input_blob].update({'shape': inputs_shape})
            input_blobs_keys[input_blob].update({'dtype': tensor_dtype})
        
        serving_outputs = serving_default.outputs
        output_blobs_keys = {key: {} for key in serving_outputs.keys()}
        tensor_shape = {key: serving_outputs[key].tensor_shape
                        for key in serving_outputs.keys()}
        for output_blob in output_blobs_keys:
            outputs_shape = [d.size for d in tensor_shape[output_blob].dim]
            tensor_dtype = serving_outputs[output_blob].dtype
            output_blobs_keys[output_blob].update({'shape': outputs_shape})
            output_blobs_keys[output_blob].update({'dtype': tensor_dtype})

        return input_blobs_keys, output_blobs_keys
    
    def __get_characters__(self):
        '''Get characters'''
        charlist = "data/kondate_nakayosi_char_list.txt"
        with open(charlist, 'r', encoding='utf-8') as f:
            return ''.join(line.strip('\n') for line in f)

    def __preprocess_input__(self, image_name, height, width):
        src = cv2.imread(image_name, cv2.IMREAD_GRAYSCALE)
        ratio = float(src.shape[1]) / float(src.shape[0])
        tw = int(height * ratio)
        rsz = cv2.resize(src, (tw, height), interpolation=cv2.INTER_AREA).astype(np.float32)
        # [h,w] -> [c,h,w]
        img = rsz[None, :, :]
        _, h, w = img.shape
        # right edge padding
        pad_img = np.pad(img, ((0, 0), (0, height - h), (0, width -  w)), mode='edge')
        return pad_img
    
    def infer(self, image_path):
        # Read and pre-process input image (NOTE: one image only)
        input_path = image_path
        input_image = self.__preprocess_input__(input_path, height=self.input_height, width=self.input_width)[None,:,:,:]
        input_image = input_image.astype(np.float32)
        
        # Model ServerにgRPCでアクセスしてモデルをコール
        request = predict_pb2.PredictRequest()
        request.model_spec.name = self.model_name
        request.inputs[self.input_name].CopyFrom(make_tensor_proto(input_image, shape=(input_image.shape)))
        result = self.stub.Predict(request, 10.0) # result includes a dictionary with all model outputs
        preds = make_ndarray(result.outputs[self.output_name])
        result = self.codec.decode(preds)
        
        return result

### 実行

In [None]:
image_path = "data/test.png"

img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
f = io.BytesIO()
PIL.Image.fromarray(img).save(f, 'jpeg')
IPython.display.display(IPython.display.Image(data=f.getvalue()))

ocr = RemoteJapaneseHandwrittenOCR(grpc_address='192.168.145.33', grpc_port='9000', model_name='ocr')
result = ocr.infer(image_path)
print(result)

---

# おしまい！