In [None]:
!pip install transformers flask flask-ngrok

from transformers import AutoModel, AutoTokenizer
import torch
from flask import Flask, request
from flask_ngrok import run_with_ngrok
import json

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
app = Flask(__name__)
run_with_ngrok(app)

MODEL_NAME = 'onlplab/alephbert-base'

In [None]:
model = AutoModel.from_pretrained(MODEL_NAME, output_hidden_states=True).to(device)
print(f'MODEL RUNNING ON DEVICE {model.device}')
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

model.eval()

In [None]:
def encode_sents(*args):
    assert all(type(i) == str for i in args)
    for arg in args:
        tokenized_inputs = tokenizer(arg, return_tensors='pt').to(0)
        tokens = tokenizer.convert_ids_to_tokens(tokenized_inputs.input_ids.squeeze())[1:-1]
        word_ids = tokenized_inputs.word_ids()[1:-1]
        
        with torch.no_grad():
            hidden_states = model(**tokenized_inputs).hidden_states
            
        hidden_states = hidden_states[-1].detach().cpu().squeeze()[1:-1]
        exports = zip(tokens, word_ids, hidden_states.tolist())
        
        yield [{'token': t, 
                'word_id': w, 
                'embedding': h} for t, w, h in exports]

In [None]:
@app.route('/get_embeddings', methods=['POST'])
def get_embeddings():
    data = request.get_json(force=True, silent=True)
    assert all(i in ['instances', 'parameters'] for i in data.keys())
    
    return {'predictions': [i for i in encode_sents(*data['instances'])]}

In [None]:
app.run()