fix merge conflicts

UUDigitalHumanitieslab · Jun 20, 2024 · 0ff3119 · 0ff3119
2 parents c013395 + 4166fc0
commit 0ff3119
Show file tree

Hide file tree

Showing 248 changed files with 7,441 additions and 4,791 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml
@@ -47,9 +47,8 @@ body:
     attributes:
       label: Version
       description: |
-        For third-party and local servers, please add information about the version of the
-        software, if you know it. A version number (e.g "1.2.3") is great. For a pre-release
-        build, you can provide the branch or commit hash.
+        Please add information about the version of I-analyzer where you encountered the bug.
+        You can find the version number (e.g. "1.2.3") in the footer of the site.
     validations:
       required: false
   - type: textarea

diff --git a/.github/workflows/backend-test.yml b/.github/workflows/backend-test.yml
@@ -0,0 +1,27 @@
+# This workflow will run backend tests on the Python version defined in the Dockerfiles
+
+name: Backend unit tests
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - 'develop'
+      - 'master'
+      - 'feature/**'
+      - 'bugfix/**'
+      - 'hotfix/**'
+      - 'release/**'
+      - 'dependabot/**'
+    paths-ignore:
+      - 'frontend/**'
+      - '**.md'
+
+jobs:
+  backend-test:
+    name: Test Backend
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Run backend tests
+      run: sudo mkdir -p /ci-data && sudo docker-compose --env-file .env-ci run backend pytest
diff --git a/.github/workflows/frontend-test.yml b/.github/workflows/frontend-test.yml
@@ -0,0 +1,27 @@
+# This workflow will run frontend tests on the Node version defined in the Dockerfiles
+
+name: Frontend unit tests
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - 'develop'
+      - 'master'
+      - 'feature/**'
+      - 'bugfix/**'
+      - 'hotfix/**'
+      - 'release/**'
+      - 'dependabot/**'
+    paths-ignore:
+      - 'backend/**'
+      - '**.md'
+
+jobs:
+  frontend-test:
+    name: Test Frontend
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Run frontend tests
+      run: sudo docker-compose --env-file .env-ci run frontend yarn test
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -0,0 +1,25 @@
+# This action will update the CITATION.cff file for new release or hotfix branches
+
+name: Release
+
+on:
+  push:
+    branches:
+      - 'release/**'
+      - 'hotfix/**'
+
+jobs:
+  citation-update:
+    name: Update CITATION.cff
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Autoformat CITATION.cff
+        run: |
+          version=`grep -o '\d\+\.\d\+\.\d\+' package.json`
+          today=`date +"%Y-%m-%d"`
+          sed -i "s/^version: [[:digit:]]\{1,\}\.[[:digit:]]\{1,\}\.[[:digit:]]\{1,\}/version: $version/" CITATION.cff
+          sed -i "s/[[:digit:]]\{4\}-[[:digit:]]\{2\}-[[:digit:]]\{2\}/$today/" CITATION.cff
+          bash ./update-citation.sh
+          git commit -a -m "update version and date in CITATION.cff"
+
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
diff --git a/.nvmrc b/.nvmrc
@@ -0,0 +1 @@
+18.17.1
diff --git a/CITATION.cff b/CITATION.cff
@@ -35,5 +35,5 @@ keywords:
   - elasticsearch
   - natural language processing
 license: MIT
-version: 5.5.1
-date-released: '2024-03-21'
+version: 5.8.0
+date-released: '2024-06-19'
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ For corpora included in I-analyzer, the backend includes a definition file that
 
 ## Usage
 
-If you are interested in using I-analyzer, the most straightforward way to get started is to make an account at [ianalyzer.hum.uu.nl](https://ianalyzer.hum.uu.nl/). This server is maintained by the Research Software Lab and contains corpora focused on a variety of fields. We also maintain more specialised collections at [PEACE portal](https://peace.sites.uu.nl/epigraphy/search/) and [People & Parliament  (not publicly accessible)](https://people-and-parliament.hum.uu.nl/).
+If you are interested in using I-analyzer, the most straightforward way to get started is to visit [ianalyzer.hum.uu.nl](https://ianalyzer.hum.uu.nl/). This server is maintained by the Research Software Lab and contains corpora focused on a variety of fields. We also maintain more specialised collections at [PEACE portal](https://peace.sites.uu.nl/epigraphy/search/) and [People & Parliament](https://people-and-parliament.hum.uu.nl/).
 
 I-analyzer does not have an "upload data" option (yet!). If you are interested in using I-analyzer as a way to publish your dataset, or to make it easier to search and analyse, you can go about this two ways:
 

diff --git a/backend/addcorpus/admin.py b/backend/addcorpus/admin.py
@@ -15,7 +15,7 @@ def show_warning_message(request):
 
 
 class CorpusAdmin(admin.ModelAdmin):
-    readonly_fields = ['name', 'configuration', 'has_python_definition', 'ready_to_index', 'ready_to_publish']
+    readonly_fields = ['configuration', 'ready_to_index', 'ready_to_publish']
     fields = ['name', 'groups', 'configuration', 'has_python_definition', 'ready_to_index', 'ready_to_publish', 'active']
     list_display = ['name', 'active']
     list_filter = ['groups', 'active']
@@ -44,6 +44,14 @@ class CorpusConfigurationAdmin(admin.ModelAdmin):
                     'image',
                 ]
             }
+        ), (
+            'Source data extraction',
+            {
+                'fields': [
+                    'data_directory',
+                    'source_data_delimiter',
+                ]
+            }
         ), (
             'Content',
             {
@@ -104,13 +112,21 @@ class FieldAdmin(admin.ModelAdmin):
                 ]
             }
         ),
+        (
+            'Source data extraction',
+            {
+                'fields': [
+                    'extract_column',
+                    'required',
+                ]
+            }
+        ),
         (
             'Indexing options',
             {
                 'fields': [
                     'es_mapping',
                     'indexed',
-                    'required',
                 ]
             }
         ), (

diff --git a/backend/addcorpus/conftest.py b/backend/addcorpus/conftest.py
@@ -1,31 +1,14 @@
 import pytest
-import os
 from django.contrib.auth.models import Group
 from addcorpus.models import Corpus
 
 @pytest.fixture()
-def group_with_access(db, mock_corpus):
+def group_with_access(db, basic_mock_corpus):
     '''Create a group with access to the mock corpus'''
     group = Group.objects.create(name='nice-users')
-    corpus = Corpus.objects.get(name=mock_corpus)
+    corpus = Corpus.objects.get(name=basic_mock_corpus)
     corpus.groups.add(group)
     corpus.save()
     yield group
     group.delete()
 
-here = os.path.abspath(os.path.dirname(__file__))
-
-@pytest.fixture()
-def mock_corpus():
-    return 'mock-csv-corpus'
-
-
-@pytest.fixture()
-def basic_corpus():
-    corpus_name = 'mock-basic-corpus'
-    basic_group = Group.objects.create(name='basic')
-    corpus = Corpus.objects.get(name=corpus_name)
-    corpus.groups.add(basic_group)
-    yield corpus_name
-    corpus.groups.remove(basic_group)
-    basic_group.delete()
diff --git a/backend/addcorpus/es_mappings.py b/backend/addcorpus/es_mappings.py
@@ -1,12 +1,16 @@
-from addcorpus.es_settings import add_language_string
+from typing import Dict
+from addcorpus.es_settings import add_language_string, stopwords_available, stemming_available
+
+def primary_mapping_type(es_mapping: Dict) -> str:
+    return es_mapping.get('type', None)
 
 def main_content_mapping(token_counts=True, stopword_analysis=False, stemming_analysis=False, language=None, updated_highlighting=True):
     '''
     Mapping for the main content field. Options:
 
     - `token_counts`: enables aggregations for the total number of words. Used for relative term frequencies.
-    - `stopword_analysis`: enables analysis using stopword removal.
-    - `stemming_analysis`: enables analysis using stemming.
+    - `stopword_analysis`: enables analysis using stopword removal, if available for the language.
+    - `stemming_analysis`: enables analysis using stemming, if available for the language.
     - `updated_highlighting`: enables the new highlighter, which only works for fields that are indexed with the term vector set to 'with_positions_offsets'.
     '''
 
@@ -26,13 +30,13 @@ def main_content_mapping(token_counts=True, stopword_analysis=False, stemming_an
                 "type":     "token_count",
                 "analyzer": "standard"
             }
-        if stopword_analysis:
+        if stopword_analysis and stopwords_available(language):
             multifields['clean'] = {
                 "type": "text",
                 "analyzer": add_language_string('clean', language),
                 "term_vector": "with_positions_offsets" # include character positions for highlighting
             }
-        if stemming_analysis:
+        if stemming_analysis and stemming_available(language):
             multifields['stemmed'] = {
                 "type": "text",
                 "analyzer": add_language_string('stemmed', language),
@@ -87,8 +91,14 @@ def int_mapping():
         'type': 'integer'
     }
 
+def float_mapping():
+    return {
+        'type': 'float'
+    }
+
+
 def bool_mapping():
     return {'type': 'boolean'}
 
 def geo_mapping():
-    return {'type': 'geo_point'}
+    return {'type': 'geo_point'}