Skip to content

Commit

Permalink
Add Tika Server to package
Browse files Browse the repository at this point in the history
  • Loading branch information
helviojunior committed May 12, 2023
1 parent 87e887d commit b2cc7c3
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 0 deletions.
13 changes: 13 additions & 0 deletions .github/workflows/build_and_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ jobs:
- name: Checkout filecrawler
uses: actions/checkout@v3

- name: Install dependencies
uses: awalsh128/cache-apt-pkgs-action@latest
with:
packages: wget curl jq
version: 1.0

- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
Expand All @@ -102,6 +108,13 @@ jobs:
python -m pip install --upgrade pip wheel
python -m pip install -r requirements.txt -r tests/requirements-test.txt
- name: Get tika server .jar
run: |
VER=$(curl -s "https://raw.githubusercontent.com/chrismattmann/tika-python/master/tika/tika.py" | grep 'TIKA_VERSION' | grep -oE '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' || echo '2.6.0')
SERVER_HASH=$(curl -s "http://search.maven.org/remotecontent?filepath=org/apache/tika/tika-server-standard/$VER/tika-server-standard-$VER.jar.sha1")
wget -nv -O "./filecrawler/libs/bin/tika-server.jar" "http://search.maven.org/remotecontent?filepath=org/apache/tika/tika-server-standard/$VER/tika-server-standard-$VER.jar"
echo "${SERVER_HASH} ./filecrawler/libs/bin/tika-server.jar" | sha1sum -c - || exit 1
- name: Build artifact
run: |
python setup.py sdist
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,12 @@ jobs:
- name: Checkout filecrawler
uses: actions/checkout@v3

- name: Install dependencies
uses: awalsh128/cache-apt-pkgs-action@latest
with:
packages: wget curl jq
version: 1.0

- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
Expand All @@ -168,6 +174,13 @@ jobs:
meta=$(cat filecrawler/__meta__.py | sed "s/__version__.*/__version__ = '"${oldv}"-"${current}"'/")
echo "$meta" > filecrawler/__meta__.py
- name: Get tika server .jar
run: |
VER=$(curl -s "https://raw.githubusercontent.com/chrismattmann/tika-python/master/tika/tika.py" | grep 'TIKA_VERSION' | grep -oE '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' || echo '2.6.0')
SERVER_HASH=$(curl -s "http://search.maven.org/remotecontent?filepath=org/apache/tika/tika-server-standard/$VER/tika-server-standard-$VER.jar.sha1")
wget -nv -O "./filecrawler/libs/bin/tika-server.jar" "http://search.maven.org/remotecontent?filepath=org/apache/tika/tika-server-standard/$VER/tika-server-standard-$VER.jar"
echo "${SERVER_HASH} ./filecrawler/libs/bin/tika-server.jar" | sha1sum -c - || exit 1
- name: Install filecrawler
run: |
pip install .
Expand Down
2 changes: 2 additions & 0 deletions filecrawler/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ def initialize():
# Overwrite config values with arguments (if defined)
Configuration.load_from_arguments()

os.environ["TIKA_SERVER_JAR"] = os.path.join(Configuration.lib_path, 'tika-server.jar')


@staticmethod
def load_from_arguments():
Expand Down

0 comments on commit b2cc7c3

Please sign in to comment.