# BentoML PyTorch TLDR Tutorial

Link to source code: https://github.com/bentoml/gallery/tree/main/TLDR/pytorch_tldr_demo/

Install required dependencies:

In [6]:
!python3 -m pip install --quiet -r requirements.txt

Import required libraries:

## Download and Extract Reddit TL;DR

In [7]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import requests
from os.path import exists
import zipfile
import json
from tqdm import trange

from __future__ import unicode_literals, print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import os
import re
import random

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
dataset_path = 'dataset/corpus-webis-tldr-17.zip'

if not exists(dataset_path):
    r = requests.get('https://zenodo.org/record/1043504/files/corpus-webis-tldr-17.zip?download=1')
    with open(dataset_path, 'wb') as f:
        f.write(r.content)
    with zipfile.ZipFile(dataset_path, 'r') as z:
        z.extractall('dataset/')

## Begin processing data

In [9]:
SOS_token = 0
EOS_token = 1
MAX_LENGTH = 26

#initialize Lang Class
class Lang:
    def __init__(self):
        #initialize containers to hold the words and corresponding index
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    #split a sentence into words and add it to the container
    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    #If the word is not in the container, the word will be added to it, 
    #else, update the word counter
    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [10]:
class Data():
    """
    A generator that iterates through a dataset file and
    yields a tuple of (body, summary)
    """
    def __init__(self, path='dataset/corpus-webis-tldr-17.json'):
        self.df = pd.read_json(path, lines=True, chunksize=100000)

    def __iter__(self):
        for d in self.df:
            body = d['content']
            summary = d['summary']
            yield self._normalize_sentence(body), \
                self._normalize_sentence(summary)

    #Normalize every sentence
    def _normalize_sentence(self, sentence):
        sentence = sentence.str.lower()
        sentence = sentence.str.replace('[^A-Za-z\s]+', '', regex=True)
        sentence = sentence.str.normalize('NFD')
        sentence = sentence.str.encode('ascii', errors='ignore').str.decode('utf-8')
        return sentence

    def process_data(self):
        bodies, summaries = next(self.__iter__())

        source = Lang()
        target = Lang()
        pairs = []
        for body, summary in zip(bodies, summaries):
            full = [body, summary]
            source.addSentence(body)
            target.addSentence(summary)
            pairs.append(full)
        return source, target, pairs

## Define the model

First let's define a simple PyTorch network

In [11]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]

def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

def tensorsFromPair(input_lang, output_lang, pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [12]:
class Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, embbed_dim, num_layers):
        super(Encoder, self).__init__()
        #set the encoder input dimesion , embbed dimesion, hidden dimesion, and number of layers 
        self.input_dim = input_dim
        self.embbed_dim = embbed_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        #initialize the embedding layer with input and embbed dimention
        self.embedding = nn.Embedding(input_dim, self.embbed_dim)
        #intialize the GRU to take the input dimetion of embbed, and output dimention of hidden and
        #set the number of gru layers
        self.gru = nn.GRU(self.embbed_dim, self.hidden_dim, num_layers=self.num_layers)

    def forward(self, src):
        embedded = self.embedding(src).view(1,1,-1)
        outputs, hidden = self.gru(embedded)
        return outputs, hidden

class Decoder(nn.Module):
    def __init__(self, output_dim, hidden_dim, embbed_dim, num_layers):
        super(Decoder, self).__init__()

        #set the encoder output dimension, embed dimension, hidden dimension, and number of layers 
        self.embbed_dim = embbed_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers

        # initialize every layer with the appropriate dimension. For the decoder layer, it will consist of an embedding, GRU, a Linear layer and a Log softmax activation function.
        self.embedding = nn.Embedding(output_dim, self.embbed_dim)
        self.gru = nn.GRU(self.embbed_dim, self.hidden_dim, num_layers=self.num_layers)
        self.out = nn.Linear(self.hidden_dim, output_dim)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        # reshape the input to (1, batch_size)
        input = input.view(1, -1)
        embedded = F.relu(self.embedding(input))
        output, hidden = self.gru(embedded, hidden)       
        prediction = self.softmax(self.out(output[0]))

        return prediction, hidden

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()

        #initialize the encoder and decoder
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, source, target, teacher_forcing_ratio=0.5):
        input_length = source.size(0) #get the input length (number of words in sentence)
        batch_size = target.shape[1] 
        target_length = target.shape[0]
        vocab_size = self.decoder.output_dim

        #initialize a variable to hold the predicted outputs
        outputs = torch.zeros(target_length, batch_size, vocab_size).to(self.device)

        #encode every word in a sentence
        for i in range(input_length):
            encoder_output, encoder_hidden = self.encoder(source[i])

            #use the encoder’s hidden layer as the decoder hidden
            decoder_hidden = encoder_hidden.to(device)

            #add a token before the first predicted word
            decoder_input = torch.tensor([SOS_token], device=device)  # SOS

            #topk is used to get the top K value over a list
            #predict the output word from the current target word. If we enable the teaching force,  then the #next decoder input is the next word, else, use the decoder output highest value. 

            for t in range(target_length):   
                decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
                outputs[t] = decoder_output
                teacher_force = random.random() < teacher_forcing_ratio
                topv, topi = decoder_output.topk(1)
                input = (target[t] if teacher_force else topi)
                if(teacher_force == False and input.item() == EOS_token):
                    break

            return outputs

## Training and Saving the model

Then we define a simple PyTorch network and some helper functions

In [13]:
teacher_forcing_ratio = 0.5

def clacModel(model, input_tensor, target_tensor, model_optimizer, criterion):
    model_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    loss = 0
    epoch_loss = 0
    # print(input_tensor.shape)

    output = model(input_tensor, target_tensor)

    num_iter = output.size(0)

#calculate the loss from a predicted sentence with the expected result
    for ot in range(num_iter):
        loss += criterion(output[ot], target_tensor[ot])

    loss.backward()
    model_optimizer.step()
    epoch_loss = loss.item() / num_iter

    return epoch_loss

def trainModel(model, source, target, pairs, num_iteration=20000):
    losses = []
    model.train()

    optimizer = optim.SGD(model.parameters(), lr=0.01)
    criterion = nn.NLLLoss()
    total_loss_iterations = 0

    training_pairs = [tensorsFromPair(source, target, random.choice(pairs))
                     for i in range(num_iteration)]

    for it in trange(1, num_iteration+1):
        training_pair = training_pairs[it - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = clacModel(model, input_tensor, target_tensor, optimizer, criterion)

        total_loss_iterations += loss

        if it % 5000 == 0:
            avarage_loss = total_loss_iterations / 5000
            total_loss_iterations = 0
            losses.append(average_loss)
            source, target, pairs = data.process_data()

    torch.save(model.state_dict(), 'mytraining.pt')
    return model, losses

In [14]:
def evaluate(model, input_lang, output_lang, sentences):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentences[0])
        output_tensor = tensorFromSentence(output_lang, sentences[1])

        decoded_words = []

        output = model(input_tensor, output_tensor)
        # print(output_tensor)

        for ot in range(output.size(0)):
            topv, topi = output[ot].topk(1)
            # print(topi)

            if topi[0].item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi[0].item()])
    return decoded_words

def evaluateRandomly(model, source, target, pairs, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('source {}'.format(pair[0]))
        print('target {}'.format(pair[1]))
        output_words = evaluate(model, source, target, pair)
        output_sentence = ' '.join(output_words)
        print('predicted {}'.format(output_sentence))

In [16]:
data = Data()
source, target, pairs = data.process_data()

randomize = random.choice(pairs)
print('random sentence {}'.format(randomize))

#print number of words
input_size = source.n_words
output_size = target.n_words
print('Input : {} Output : {}'.format(input_size, output_size))

embed_size = 256
hidden_size = 512
num_layers = 1
num_iteration = 2

#create encoder-decoder model
encoder = Encoder(input_size, hidden_size, embed_size, num_layers)
decoder = Decoder(output_size, hidden_size, embed_size, num_layers)

model = Seq2Seq(encoder, decoder, device).to(device)

#print model 
print(encoder)
print(decoder)

model, losses = trainModel(model, source, target, pairs, num_iteration)

random sentence ['oh ive had so many fails  the worst of them all would have to be when i was in high school  i was excited to go to dinner and a movie with my totally not dating this dude but we hang out together all the time and cuddle on the bed while watching star trek friend  by dinner i mean mcdonalds  we were classy \n i waited and waited and got hungry and waited some more and waited more  didnt hear anything from him and nobody answered the phone at his house  got  really  hungry  turned the oven on for a pizza  except id turned on the stove not the oven and id thrown my math notebook on top of the stove \n i ran into the kitchen to see the notebook smoldering  i threw it on the floor and dumped a couple pitchers of water on it  singed the linoleum  turned the oven on put in the pizza  burned the pizza  the center was way overdone and i had to chisel it off  ate cereal instead \n about six hours after i was supposed to get picked up his dad called me  turns out hed broken his 

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:16<00:00,  8.39s/it]


In [None]:
losses

In [None]:
evaluateRandomly(model, source, target, pairs)

### Cross Validation

We can do some cross validation and the results can be saved with the model as metadata


### saving the model with some metadata

In [None]:
correct, total = test_model(trained_model, test_loader)
metadata = {
    "accuracy": float(correct)/total,
    "cv_stats": cv_results,
}

tag = bentoml.pytorch.save(
    "pytorch_tldr",
    trained_model,
    metadata=metadata,
)


## Create a BentoML Service for serving the model

Note: using `%%writefile` here because `bentoml.Service` instance must be created in a separate `.py` file

Even though we have only one model, we can create as many api endpoints as we want. Here we create two end points `predict_ndarray` and `predict_image`

In [None]:
%%writefile service.py

import typing as t

import numpy as np
import PIL.Image
from PIL.Image import Image as PILImage

import bentoml
from bentoml.io import Image
from bentoml.io import NumpyNdarray


tldr_runner = bentoml.pytorch.load_runner(
    "pytorch_tldr",
    name="tldr_runner",
    predict_fn_name="predict",
)

svc = bentoml.Service(
    name="pytorch_tldr_demo",
    runners=[
        tldr_runner,
    ],
)

Start a dev model server to test out the service defined above

In [None]:
!bentoml serve service.py:svc

Now you can use something like:

`curl -H "Content-Type: multipart/form-data" -F'fileobj=@samples/1.png;type=image/png' http://127.0.0.1:5000/predict_image`
    
to send an image to the digit recognition service

## Build a Bento for distribution and deployment

In [None]:
bentoml.build(
    "service.py:svc",
    include=["*.py"],
    exclude=["tests/"],
    description="file:./README.md",
    python=dict(
        packages=["scikit-learn", "torch", "Pillow"],
    )
)

Starting a dev server with the Bento build:

In [None]:
!bentoml serve pytorch_tldr_demo:latest