Skip to content

Commit

Permalink
Merge pull request #103 from i-dot-ai/feature/REDBOX-73-unit-test-cor…
Browse files Browse the repository at this point in the history
…e-api

Feature/redbox 73 unit test core api
  • Loading branch information
lmwilkigov committed Mar 13, 2024
2 parents 26c444a + 7d2b185 commit 30596e7
Show file tree
Hide file tree
Showing 9 changed files with 226 additions and 41 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:
paths:
- redbox/**
- embed/**
- core_api/**
- ingest/**
- Makefile
- .github/**
Expand Down Expand Up @@ -46,8 +47,8 @@ jobs:
run: |
mkdir -p data/elastic/
chmod 777 data/elastic/
cp .env.example .env
docker compose up -d --wait elasticsearch minio
cp .env.test .env
docker compose up -d --wait elasticsearch minio rabbitmq
- name: Test redbox with pytest
run: |
Expand All @@ -61,6 +62,10 @@ jobs:
run: |
make test-ingest
- name: Test core with pytest
run: |
make test-core-api
static_checks:

Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ build:
rebuild:
docker compose build --no-cache

test-core-api:
poetry install --no-root --no-ansi --with worker,embed,api,dev --without ai,ingest,django-app,pytest-django
poetry run pytest core_api/tests --cov=core_api/src -v --cov-report=term-missing --cov-fail-under=45

test-embed:
poetry install --no-root --no-ansi --with worker,embed,api,dev --without ai,ingest,django-app,pytest-django
poetry run pytest embed/tests --cov=embed/src -v --cov-report=term-missing --cov-fail-under=45
Expand Down
19 changes: 6 additions & 13 deletions core_api/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
logging.basicConfig(level=logging.INFO)
log = logging.getLogger()


env = Settings()


Expand All @@ -29,9 +30,7 @@
connection = env.blocking_connection()
channel = connection.channel()
channel.queue_declare(queue=env.ingest_queue_name, durable=True)

elif env.queue == "sqs":
sqs = env.sqs_client()
else:
raise NotImplementedError("SQS is not yet implemented")


Expand All @@ -54,7 +53,6 @@ class StatusResponse(pydantic.BaseModel):
# === API Setup ===

start_time = datetime.now()
IS_READY = True


# Create API
Expand Down Expand Up @@ -92,17 +90,12 @@ def health():
uptime = datetime.now() - start_time
uptime_seconds = uptime.total_seconds()

output = {"status": None, "uptime_seconds": uptime_seconds, "version": app.version}

if IS_READY:
output["status"] = "ready"
else:
output["status"] = "loading"
output = {"status": "ready", "uptime_seconds": uptime_seconds, "version": app.version}

return output


@app.post("/file/upload", response_model=File, tags=["file"])
@app.post("/file", response_model=File, tags=["file"])
async def create_upload_file(file: UploadFile, ingest=True) -> File:
"""Upload a file to the object store and create a record in the database
Expand Down Expand Up @@ -160,7 +153,7 @@ def get_file(file_uuid: UUID) -> File:
return storage_handler.read_item(str(file_uuid), model_type="File")


@app.post("/file/{file_uuid}/delete", response_model=File, tags=["file"])
@app.delete("/file/{file_uuid}", response_model=File, tags=["file"])
def delete_file(file_uuid: str) -> File:
"""Delete a file from the object store and the database
Expand All @@ -176,7 +169,7 @@ def delete_file(file_uuid: str) -> File:
return file


@app.post("/file/ingest/{file_uuid}", response_model=File, tags=["file"])
@app.post("/file/{file_uuid}/ingest", response_model=File, tags=["file"])
def ingest_file(file_uuid: str) -> File:
"""Trigger the ingest process for a file to a queue.
Expand Down
111 changes: 111 additions & 0 deletions core_api/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import os
from typing import TypeVar, Generator

import pytest
from elasticsearch import Elasticsearch

from fastapi.testclient import TestClient
from redbox.models import File
from core_api.src.app import app as application, env
from redbox.storage import ElasticsearchStorageHandler

T = TypeVar("T")

YieldFixture = Generator[T, None, None]


@pytest.fixture
def s3_client():
yield env.s3_client()


@pytest.fixture
def es_client() -> YieldFixture[Elasticsearch]:
yield env.elasticsearch_client()


@pytest.fixture
def app_client():
yield TestClient(application)


@pytest.fixture
def elasticsearch_storage_handler(es_client):
yield ElasticsearchStorageHandler(es_client=es_client, root_index="redbox-data")


@pytest.fixture
def file(s3_client, file_pdf_path, bucket) -> YieldFixture[File]:
"""
TODO: this is a cut and paste of core_api:create_upload_file
When we come to test core_api we should think about
the relationship between core_api and the ingest app
"""
file_name = os.path.basename(file_pdf_path)
file_type = file_name.split(".")[-1]

with open(file_pdf_path, "rb") as f:
s3_client.put_object(
Bucket=bucket,
Body=f.read(),
Key=file_name,
Tagging=f"file_type={file_type}",
)

authenticated_s3_url = s3_client.generate_presigned_url(
"get_object",
Params={"Bucket": env.bucket_name, "Key": file_name},
ExpiresIn=3600,
)

# Strip off the query string (we don't need the keys)
simple_s3_url = authenticated_s3_url.split("?")[0]
file_record = File(
name=file_name,
path=simple_s3_url,
type=file_type,
creator_user_uuid="dev",
storage_kind=env.object_store,
)

yield file_record


@pytest.fixture
def stored_file(elasticsearch_storage_handler, file) -> YieldFixture[File]:
elasticsearch_storage_handler.write_item(file)
yield file


@pytest.fixture
def bucket(s3_client):
buckets = s3_client.list_buckets()
if not any(bucket["Name"] == env.bucket_name for bucket in buckets["Buckets"]):
s3_client.create_bucket(Bucket=env.bucket_name)
yield env.bucket_name


@pytest.fixture
def file_pdf_path() -> YieldFixture[str]:
path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"..", "..", "tests",
"data",
"pdf",
"Cabinet Office - Wikipedia.pdf",
)
yield path


@pytest.fixture
def rabbitmq_connection():
connection = env.blocking_connection()
yield connection
connection.close()


@pytest.fixture
def rabbitmq_channel(rabbitmq_connection):
channel = rabbitmq_connection.channel()
yield channel
channel.close()
90 changes: 90 additions & 0 deletions core_api/tests/test_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from threading import Event

import pytest
from elasticsearch import NotFoundError

from core_api.src.app import env


def test_get_health(app_client):
"""
Given that the app is running
When I call /health
I Expect to see the docs
"""
response = app_client.get("/health")
assert response.status_code == 200


def test_post_file_upload(s3_client, app_client, elasticsearch_storage_handler, bucket, file_pdf_path):
"""
Given a new file
When I POST it to /file
I Expect to see it persisted in s3 and elastic-search
"""
with open(file_pdf_path, "rb") as f:
response = app_client.post("/file", files={"file": ("filename", f, "pdf")})
assert response.status_code == 200
assert s3_client.get_object(Bucket=bucket, Key=file_pdf_path.split("/")[-1])
json_response = response.json()
assert elasticsearch_storage_handler.read_item(
item_uuid=json_response["uuid"], model_type=json_response["model_type"]
)


def test_get_file(app_client, stored_file):
"""
Given a previously saved file
When I GET it from /file/uuid
I Expect to receive it
"""

response = app_client.get(f"/file/{stored_file.uuid}")
assert response.status_code == 200


def test_delete_file(s3_client, app_client, elasticsearch_storage_handler, bucket, stored_file):
"""
Given a previously saved file
When I DELETE it to /file
I Expect to see it removed from s3 and elastic-search
"""
# check assets exist
assert s3_client.get_object(Bucket=bucket, Key=stored_file.name)
assert elasticsearch_storage_handler.read_item(item_uuid=stored_file.uuid, model_type="file")

response = app_client.delete(f"/file/{stored_file.uuid}")
assert response.status_code == 200

# check assets dont exist
with pytest.raises(Exception):
s3_client.get_object(Bucket=bucket, Key=stored_file.name)

with pytest.raises(NotFoundError):
elasticsearch_storage_handler.read_item(item_uuid=stored_file.uuid, model_type="file")


def test_ingest_file(app_client, rabbitmq_channel, stored_file):
"""
Given a previously saved file
When I POST to /file/uuid/ingest
I Expect to see a message on the ingest-queue, THIS IS NOT WORKING
"""
message_consumed = Event()

def callback(ch, method, properties, body):
message_consumed.set()
ch.basic_ack(delivery_tag=method.delivery_tag)

rabbitmq_channel.basic_consume(queue=env.ingest_queue_name, on_message_callback=callback)

response = app_client.post(f"/file/{stored_file.uuid}/ingest/")
assert response.status_code == 200

# TODO: fix this!
# start_time = time.time()
# while not message_consumed.is_set() and time.time() - start_time < 10:
# time.sleep(0.1)
#
# assert message_consumed.is_set()
# rabbitmq_channel.basic_cancel(consumer_tag=None)
5 changes: 1 addition & 4 deletions ingest/src/app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import logging
import os

from model_db import SentenceTransformerDB
from redbox.models import File, Settings
Expand All @@ -10,9 +9,7 @@
logging.basicConfig(level=logging.INFO)
log = logging.getLogger()

env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".env.test")

env = Settings(_env_file=env_path) # type: ignore
env = Settings()


class FileIngestor:
Expand Down
14 changes: 2 additions & 12 deletions ingest/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,13 @@
from elasticsearch import Elasticsearch
from sentence_transformers import SentenceTransformer

from redbox.models import File, Settings
from redbox.models import File
from ingest.src.app import env

T = TypeVar("T")

YieldFixture = Generator[T, None, None]

env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".env.test")


env = Settings( # type: ignore
_env_file=env_path,
object_store="minio",
minio_host="localhost",
elastic_host="localhost",
embedding_model="paraphrase-albert-small-v2",
)


@pytest.fixture
def s3_client():
Expand Down
10 changes: 4 additions & 6 deletions redbox/models/settings.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import os

from typing import Optional, Literal

import boto3
import pika
from elasticsearch import Elasticsearch
from pydantic_settings import BaseSettings, SettingsConfigDict

env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".env")


class Settings(BaseSettings):
Expand Down Expand Up @@ -47,11 +46,7 @@ class Settings(BaseSettings):
rabbitmq_port: int = 5672
rabbitmq_user: str = "guest"
rabbitmq_password: str = "guest"

dev_mode: bool = False

model_config = SettingsConfigDict(env_file=env_path)

django_settings_module: str = "redbox_app.settings"
debug: bool = True
django_secret_key: str
Expand All @@ -62,6 +57,8 @@ class Settings(BaseSettings):
postgres_host: str = "db"
contact_email: str = "test@example.com"

model_config = SettingsConfigDict(env_file=".env")

def elasticsearch_client(self) -> Elasticsearch:
es = Elasticsearch(
hosts=[
Expand All @@ -73,6 +70,7 @@ def elasticsearch_client(self) -> Elasticsearch:
],
basic_auth=(self.elastic_user, self.elastic_password),
)

return es

def s3_client(self):
Expand Down
Loading

0 comments on commit 30596e7

Please sign in to comment.