### Framework imports

In [1]:
from noronha.tools.serving import OnlinePredict
from noronha.tools.shortcuts import model_path

### Application imports

In [2]:
import json
import numpy as np
import pandas as pd
import joblib
from sklearn.base import BaseEstimator, TransformerMixin
from transformers import DistilBertTokenizer
from transformers import DistilBertModel

### Loading pre-trained model

In [3]:
tokenizer_path = model_path(model='hf-tokenizer', version='distilbert-base-multilingual-cased')
torch_path = model_path(model='hf-model', version='distilbert-base-multilingual-cased')

tokenizer = DistilBertTokenizer.from_pretrained(tokenizer_path)
model = DistilBertModel.from_pretrained(torch_path, return_dict=True, output_hidden_states=True)

Some weights of the model checkpoint at /model/hf-model.distilbert-base-multilingual-cased/ were not used when initializing DistilBertModel: ['vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
class bert_emb(TransformerMixin, BaseEstimator):

    def fit(self, X, y=None):
        
        return self
        
    def transform(self, X):
        
        try:
                
            if isinstance(X, pd.Series) or isinstance(X, pd.DataFrame):
                X = X.values

            input_list = tokenizer.batch_encode_plus(X, return_tensors='pt', padding=True, truncation=True, max_length=256)
            del X
            
            return np.around(model(**input_list).get('hidden_states')[0].detach().numpy()[:,0], decimals=4).astype(np.float32)
        
        except Exception as e:
            print(str(e))
            raise e

### Loading model

In [4]:
clf_path = model_path('classifier.pkl', model='sentiment-clf-portugal')
clf = joblib.load(clf_path)

AttributeError: module '__main__' has no attribute 'bert_emb'

### Defining the prediction function

In [None]:
def predict(x):
    sentence = json.loads(x)['message']
    sentence = [sentence] if isinstance(sentence, str) else sentence
    return str(clf.predict(sentence)[0])

In [None]:
predict('{"message": "I dont feel well today"}')

### Creating the prediction service

In [None]:
OnlinePredict(predict_func=predict)()