# Convert the TensorFlow-Trained BERT Model to a PyTorch Model

## Install TensorFlow and Transformers

In [None]:
!pip install -q tensorflow==2.1.0 --upgrade --ignore-installed
!pip install -q transformers==2.8.0

## Install PyTorch and TorchServe

TorchServe is a flexible and easy to use tool for serving PyTorch models: 
* https://github.com/pytorch/serve
* https://github.com/pytorch/serve/blob/master/docs/README.md


In [None]:
!pip install -q torch==1.5.0 --upgrade --ignore-installed
!pip install -q torchserve==0.1.1
!pip install -q torch-model-archiver==0.1.1

# Restart the Kernel

In [None]:
from IPython.core.display import HTML
HTML("<script>Jupyter.notebook.kernel.restart()</script>")

In [None]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

# Retrieve the Training Job Name

In [None]:
%store -r training_job_name

In [None]:
print('Previous training_job_name: {}'.format(training_job_name))

# Download the TensorFlow-Trained Model Locally

In [None]:
models_dir = './models'

In [None]:
# Download the model and output artifacts from AWS S3
!aws s3 cp s3://$bucket/$training_job_name/output/model.tar.gz $models_dir/model.tar.gz

In [None]:
import tarfile
import pickle as pkl

#!ls -al ./models

tar = tarfile.open('{}/model.tar.gz'.format(models_dir))
tar.extractall(path=models_dir)
tar.close()

In [None]:
!ls -al $models_dir

In [None]:
transformer_model_dir = '{}/transformers/fine-tuned/'.format(models_dir)

!ls -al $transformer_model_dir

In [None]:
cat $transformer_model_dir/config.json

# Convert the TensorFlow Model to PyTorch

In [None]:
from transformers import DistilBertForSequenceClassification # PyTorch version

loaded_pytorch_model = DistilBertForSequenceClassification.from_pretrained(transformer_model_dir,
                                                                     id2label={
                                                                       0: 1,
                                                                       1: 2,
                                                                       2: 3,
                                                                       3: 4,
                                                                       4: 5
                                                                     },
                                                                     label2id={
                                                                       1: 0,
                                                                       2: 1,
                                                                       3: 2,
                                                                       4: 3,
                                                                       5: 4
                                                                     },
                                                                  from_tf=True)

In [None]:
print(type(loaded_pytorch_model))
print(loaded_pytorch_model)

# Predict with the PyTorch Model

In [None]:
from transformers import DistilBertTokenizer

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

inference_device = -1 # CPU: -1, GPU: 0
print('inference_device {}'.format(inference_device))

In [None]:
from transformers import TextClassificationPipeline

inference_pipeline = TextClassificationPipeline(model=loaded_pytorch_model, 
                                                tokenizer=tokenizer,
                                                framework='pt',
                                                device=inference_device) 

In [None]:
review = """I loved it!  I will recommend this to everyone."""
print(review, inference_pipeline(review))

In [None]:
review = """Really bad.  I hope they don't make this anymore."""
print(review, inference_pipeline(review))

# Save The Transformer/PyTorch Model with `.save_pretrained()`

In [None]:
pytorch_models_dir = './models/transformers/pytorch'

In [None]:
!mkdir -p $pytorch_models_dir

In [None]:
loaded_pytorch_model.save_pretrained(pytorch_models_dir)

In [None]:
!ls -al $pytorch_models_dir

# Load and Predict

In [None]:
pytorch_model = DistilBertForSequenceClassification.from_pretrained(pytorch_models_dir)

In [None]:
from transformers import TextClassificationPipeline

inference_pipeline = TextClassificationPipeline(model=pytorch_model, 
                                                tokenizer=tokenizer,
                                                framework='pt',
                                                device=inference_device) 

In [None]:
review = """I loved it!  I will recommend this to everyone."""
print(review, inference_pipeline(review))

# Upload Transformer/PyTorch Model to S3

In [None]:
transformer_pytorch_model_name = 'pytorch_model.bin'

In [None]:
transformer_pytorch_model_s3_uri = 's3://{}/models/transformer-pytorch/'.format(bucket)
print(transformer_pytorch_model_s3_uri)

In [None]:
!aws s3 cp --recursive $pytorch_models_dir $s3_transformer_pytorch_model_path

In [None]:
!aws s3 ls $transformer_pytorch_model_s3_uri

In [None]:
%store transformer_pytorch_model_name

In [None]:
%store transformer_pytorch_model_s3_uri

# Save PyTorch Model with `torch.save()`

In [None]:
pytorch_model_name = 'pytorch_model.pt'

In [None]:
!mkdir -p ./models/pytorch/

In [None]:
pytorch_model_path = './models/pytorch/{}'.format(pytorch_model_name)

In [None]:
import torch
torch.save(loaded_pytorch_model.state_dict(), pytorch_model_path)

In [None]:
!ls -al $pytorch_model_path

# Upload PyTorch Model to S3

In [None]:
pytorch_model_s3_uri = 's3://{}/models/pytorch/{}'.format(bucket, pytorch_model_name)
print(pytorch_model_s3_uri)

In [None]:
!aws s3 cp $pytorch_model_path $pytorch_model_s3_uri

In [None]:
!aws s3 ls $pytorch_model_s3_uri

In [None]:
%store pytorch_model_s3_uri

In [None]:
%store