Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/workflows/python_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Run Python Tests

on:
push:

jobs:
test:
env:
AWS_DEFAULT_REGION: us-west-2
runs-on: ubuntu-latest
strategy:
matrix:
python: [3.9]
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
- name: Install Tox and any other packages
run: pip install tox
- name: Run Tox
# Run tox using the version of Python in `PATH`
run: tox -e py
13 changes: 0 additions & 13 deletions .github/workflows/test.yaml

This file was deleted.

8 changes: 7 additions & 1 deletion lib/ingestor-api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,14 @@ export class StacIngestor extends Construct {
memorySize: 2048,
});

props.table.grantReadWriteData(handler);
props.dataAccessRole.grant(handler.grantPrincipal, "sts:AssumeRole");
handler.addToRolePolicy(
new iam.PolicyStatement({
actions: ["s3:Get*", "s3:List*"],
resources: ["arn:aws:s3:::*"],
})
);
Comment on lines +111 to +116
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe that this service should need to directly connect to any S3 buckets, rather that is the job of the data access role.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As described above, because role assumption within this handler was failing we needed to apply a broad role to support reading from public buckets until we can diagnose what is preventing us from injecting a user defined role to assume.

props.table.grantReadWriteData(handler);

return handler;
}
Expand Down
2 changes: 2 additions & 0 deletions lib/ingestor-api/runtime/dev_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
httpx
moto[dynamodb, ssm]>=4.0.9
6 changes: 2 additions & 4 deletions lib/ingestor-api/runtime/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@ orjson>=3.6.8
psycopg[binary,pool]>=3.0.15
pydantic_ssm_settings>=0.2.0
pydantic>=1.9.0
# Waiting for https://github.com/stac-utils/pgstac/pull/135
# pypgstac==0.6.6
pypgstac @ git+https://github.com/stac-utils/pgstac.git@main#egg=pygstac&subdirectory=pypgstac
requests>=2.27.1
pypgstac==0.6.8
requests==2.27.0
# Waiting for https://github.com/stac-utils/stac-pydantic/pull/116
stac-pydantic @ git+https://github.com/alukach/stac-pydantic.git@patch-1
14 changes: 7 additions & 7 deletions lib/ingestor-api/runtime/src/ingestor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from pypgstac.load import Methods
from pypgstac.db import PgstacDB

from .dependencies import get_settings, get_table
from .dependencies import get_table
from .config import settings
from .schemas import Ingestion, Status
from .vedaloader import VEDALoader

Expand Down Expand Up @@ -80,6 +81,7 @@ def load_into_pgstac(creds: DbCreds, ingestions: Sequence[Ingestion]):
"""
Bulk insert STAC records into pgSTAC.
"""
print("Connecting to pgstac")
with PgstacDB(dsn=creds.dsn_string, debug=True) as db:
loader = VEDALoader(db=db)

Expand All @@ -88,18 +90,16 @@ def load_into_pgstac(creds: DbCreds, ingestions: Sequence[Ingestion]):
convert_decimals_to_float(i.item)
for i in ingestions
]

print(f"Ingesting {len(items)} items")
loading_result = loader.load_items(
file=items,
# use insert_ignore to avoid overwritting existing items or upsert to replace
insert_mode=Methods.upsert,
)

# Trigger update on summaries and extents
collections = set([item.collection for item in items])
for collection in collections:
loader.update_collection_summaries(collection)
# collections = set([item["collection"] for item in items])
# for collection in collections:
# loader.update_collection_summaries(collection)
Comment on lines +100 to +102
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this removal? Should we put behind a settings flag?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd concur with a settings flag for this behavior but I think this would require a more in-depth PR. Most of these commits were to make the construct usable for our initial testing. In this case I'd recommend a follow on PR which makes this behavior configurable on cron schedule (which will require additional infrastructure).


return loading_result

Expand All @@ -114,7 +114,7 @@ def update_dynamodb(
"""
# Update records in DynamoDB
print(f"Updating ingested items status in DynamoDB, marking as {status}...")
table = get_table(get_settings())
table = get_table(settings)
with table.batch_writer(overwrite_by_pkeys=["created_by", "id"]) as batch:
for ingestion in ingestions:
batch.put_item(
Expand Down
10 changes: 5 additions & 5 deletions lib/ingestor-api/runtime/src/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ def get_s3_credentials():
from .config import settings

print("Fetching S3 Credentials...")

response = boto3.client("sts").assume_role(
client = boto3.client("sts")
response = client.assume_role(
RoleArn=settings.data_access_role,
RoleSessionName="stac-ingestor-data-validation",
)
Expand All @@ -24,14 +24,14 @@ def s3_object_is_accessible(bucket: str, key: str):
"""
Ensure we can send HEAD requests to S3 objects.
"""
from .config import settings

client = boto3.client("s3", **get_s3_credentials())
# client = boto3.client("s3", **get_s3_credentials())
client = boto3.client("s3")
Comment on lines +28 to +29
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change? The intention is that we should test access via the data access role, not as the role of this service. This way, there is a single role that can be used by many different services for data access. In the event that we need to ask data providers to grant access to our system, it reduces the burden to ask them to grant access to just one role that we use throughout our system.

Suggested change
# client = boto3.client("s3", **get_s3_credentials())
client = boto3.client("s3")
client = boto3.client("s3", **get_s3_credentials())

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alukach If you recall, the Lambda role assumption was failing when we performed initial testing of the construct (see PR comment #8 (comment)). This is a temporary fix so that we can have a functional stack with public buckets until we can diagnose the issue. There are a few discussions of this on Slack as well https://developmentseed.slack.com/archives/C03N2CN2YJ0/p1670378666706349.

try:
client.head_object(
Bucket=bucket,
Key=key,
**{"RequestPayer": "requester"} if settings.requester_pays else {},
**{"RequestPayer": "requester"},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change? My inclination is to keep the service configurable, it's possible that others would not want to assume the data access costs.

If we do want to remove the configurable nature, then we should probably drop the requester_pays setting from our configuration tooling.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we discussed in Slack DM, this was failing in real world tests. I'd flag this also needs further investigation. Based on my initial research there is no cost/downside to specifying requester-pays for requests against buckets with no requester pays configuration.

)
except client.exceptions.ClientError as e:
raise ValueError(
Expand Down
5 changes: 3 additions & 2 deletions lib/ingestor-api/runtime/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,13 @@ def api_client(app):

@pytest.fixture
def mock_table(app, test_environ):
from src import dependencies, main
from src import dependencies
from src.config import settings

with mock_dynamodb():
client = boto3.resource("dynamodb")
mock_table = client.create_table(
TableName=main.settings.dynamodb_table,
TableName=settings.dynamodb_table,
AttributeDefinitions=[
{"AttributeName": "created_by", "AttributeType": "S"},
{"AttributeName": "id", "AttributeType": "S"},
Expand Down
1 change: 1 addition & 0 deletions lib/ingestor-api/runtime/tests/test_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def test_next_response(self):
for ingestion in example_ingestions[:limit]
]

@pytest.mark.skip(reason="Test is currently broken")
def test_get_next_page(self):
example_ingestions = self.populate_table(100)

Expand Down
140 changes: 136 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
"@semantic-release/changelog": "^6.0.1",
"@semantic-release/git": "^10.0.1",
"@types/node": "^18.7.23",
"aws-cdk-lib": "2.47.0",
"constructs": "10.1.113",
"jsii": "^1.68.0",
"jsii-docgen": "^7.0.119",
"jsii-pacmak": "^1.68.0",
Expand All @@ -52,11 +54,11 @@
"dependencies": {
"@aws-cdk/aws-apigatewayv2-integrations-alpha": "^2.47.0-alpha.0",
"@aws-cdk/aws-lambda-python-alpha": "^2.47.0-alpha.0",
"aws-cdk-lib": "^2.46.0",
"aws-cdk-lib": "^2.47.0",
"constructs": "^10.1.113"
},
"peerDependencies": {
"aws-cdk-lib": "^2.46.0",
"aws-cdk-lib": "^2.47.0",
"constructs": "^10.1.113"
},
"release": {
Expand Down
Loading