In [1]:
!pip install --quiet \
    "sciencebeam-parser>=0.1.4" \
    "tensorflow<2.0.0" \
    "numpy<1.17.0" \
    "pandas<1.3.0" \
    "requests" \
    "typing_extensions"

In [2]:
!python --version

Python 3.7.6


In [3]:
!pip freeze | grep --ignore-case --extended-regexp "tensorflow|keras|sciencebeam|numpy|pandas|delft"

delft==0.2.7
Keras==2.2.4
Keras-Applications==1.0.8
keras-bert==0.84.0
keras-embed-sim==0.9.0
keras-layer-normalization==0.15.0
keras-multi-head==0.28.0
keras-pos-embd==0.12.0
keras-position-wise-feed-forward==0.7.0
Keras-Preprocessing==1.1.2
keras-self-attention==0.50.0
keras-transformer==0.39.0
numpy==1.16.6
pandas==1.2.5
sciencebeam-parser==0.1.4
sciencebeam-trainer-delft==0.0.31
tensorflow==1.15.5
tensorflow-estimator==1.15.1


In [4]:
import logging
import os
from pathlib import Path

import tensorflow as tf

In [5]:
# configure logging so that we see ScienceBeam Parser's output
logging.basicConfig(level='ERROR')

# reduce tensorflow warnings
tf.logging.set_verbosity(tf.logging.ERROR)

In [6]:
# get a sample document
sample_url = 'https://github.com/elifesciences/sciencebeam-parser/raw/develop/test-data/minimal-example.pdf'
local_sample_file = tf.keras.utils.get_file(os.path.basename(sample_url), sample_url)

In [7]:
# because we want to run other cells, we run the app in the background (not a common pattern otherwise)
# https://stackoverflow.com/a/45017691/8676953

import threading

from werkzeug.serving import make_server


class ServerThread(threading.Thread):
    def __init__(self, app, **kwargs):
        threading.Thread.__init__(self)
        self.server = make_server(app=app, **kwargs)
        self.ctx = app.app_context()
        self.ctx.push()

    def run(self):
        self.server.serve_forever()

    def shutdown(self):
        self.server.shutdown()

In [8]:
from threading import Thread

from sciencebeam_parser.config.config import AppConfig
from sciencebeam_parser.resources.default_config import DEFAULT_CONFIG_FILE
from sciencebeam_parser.service.server import create_app

config = AppConfig.load_yaml(DEFAULT_CONFIG_FILE)
app = create_app(config)
server = ServerThread(app, port=8080, host='127.0.0.1', threaded=True)
server.start()

Using TensorFlow backend.


In [9]:
# post a request to the service
import requests

with open(local_sample_file, 'rb') as fp:
    response = requests.post('http://localhost:8080/api/processHeaderDocument', files={
        'file': fp
    })
response.raise_for_status()
print(response.text)



INFO:werkzeug:127.0.0.1 - - [19/Nov/2021 21:38:46] "POST /api/processHeaderDocument HTTP/1.1" 200 -


<TEI xmlns="http://www.tei-c.org/ns/1.0"><teiHeader><fileDesc><titleStmt><title level="a" type="main" coords="1,107.60,84.88,312.10,13.28">This minimal example is used as a test document for automated tests</title></titleStmt></fileDesc><note type="O" coords="1,56.80,57.28,117.06,40.88"><hi rend="bold">Title:</hi> Minimal Example <hi rend="bold">Abstract:</hi></note><note type="O" coords="1,435.89,84.88,4.05,13.28">.</note></teiHeader></TEI>


In [10]:
# stop the service
server.shutdown()