Skip to content

Commit

Permalink
Version 2.0 Release (#12)
Browse files Browse the repository at this point in the history
* initial implementations of dynamodb table

* initial implementation of rekognition and bucket setup with CR is tested to work

* documentation updates

* further documentation optimisations

* further updates to bucket creation. PutBucketInventoryConfiguration is still having issues though

* got inventory creation setup. implemented initial setup for lambda layers. basic helper classes added

* used better deep copy of dictionary

* implemented lambdalayers and bucket>sns>sqs for event queue

* Lots of updates. Cleaned configurations. Added SNS. Fixed Layers. Simplified CRs. Integrated Layers usage in Features

* logging bucket was having events listened to on it

* small typos in update and delete for bucket linking

* converted project to use nested stacks under 1 root stack to simplify deployment

* updated readme with changes

* Create build and test pipeline

* Create dependency review

* Create codeql

* added name prefixes

* moved feature list over to github issues

* major updates. refactored out CRs for event linking bucket to sns to sqs

* mass refactor. feature methods now use state machine. repo structures and names have changed. more standardised

* bux gixes. got everything working with the state machine and helper libraries, permissions, logging, etc

* Upgraded to latest TS and CDK. Added github actions for code scanning

* upgraded CI workflow to use node v18 now

* simplifying where codacy runs

* limit CI to only run on any push to master
  • Loading branch information
bensoer committed Sep 30, 2023
1 parent 9f242f8 commit c316fde
Show file tree
Hide file tree
Showing 54 changed files with 6,587 additions and 5,680 deletions.
61 changes: 61 additions & 0 deletions .github/workflows/codacy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

# This workflow checks out code, performs a Codacy security scan
# and integrates the results with the
# GitHub Advanced Security code scanning feature. For more information on
# the Codacy security scan action usage and parameters, see
# https://github.com/codacy/codacy-analysis-cli-action.
# For more information on Codacy Analysis CLI in general, see
# https://github.com/codacy/codacy-analysis-cli.

name: Codacy Security Scan

on:
push:
branches: [ "master" ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ "master" ]
schedule:
- cron: '37 8 * * 4'

permissions:
contents: read

jobs:
codacy-security-scan:
permissions:
contents: read # for actions/checkout to fetch code
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
name: Codacy Security Scan
runs-on: ubuntu-latest
steps:
# Checkout the repository to the GitHub Actions runner
- name: Checkout code
uses: actions/checkout@v3

# Execute Codacy Analysis CLI and generate a SARIF output with the security issues identified during the analysis
- name: Run Codacy Analysis CLI
uses: codacy/codacy-analysis-cli-action@d840f886c4bd4edc059706d09c6a1586111c540b
with:
# Check https://github.com/codacy/codacy-analysis-cli#project-token to get your project token from your Codacy repository
# You can also omit the token and run the tools that support default configurations
# project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
verbose: true
output: results.sarif
format: sarif
# Adjust severity of non-security issues
gh-code-scanning-compat: true
# Force 0 exit code to allow SARIF file generation
# This will handover control about PR rejection to the GitHub side
max-allowed-issues: 2147483647

# Upload the SARIF file generated in the previous step
- name: Upload SARIF results file
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: results.sarif
20 changes: 20 additions & 0 deletions .github/workflows/dependency-review.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Dependency Review Action
#
# This Action will scan dependency manifest files that change as part of a Pull Request, surfacing known-vulnerable versions of the packages declared or updated in the PR. Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable packages will be blocked from merging.
#
# Source repository: https://github.com/actions/dependency-review-action
# Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement
name: 'Dependency Review'
on: [pull_request]

permissions:
contents: read

jobs:
dependency-review:
runs-on: ubuntu-latest
steps:
- name: 'Checkout Repository'
uses: actions/checkout@v3
- name: 'Dependency Review'
uses: actions/dependency-review-action@v1
31 changes: 31 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# This is a basic workflow to help you get started with Actions

name: CI

# Controls when the workflow will run
on:
# Triggers the workflow on push or pull request events but only for the "master" branch
push:
branches: [ "master" ]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
- uses: actions/checkout@v3

- name: Build and Compile
uses: actions/setup-node@v3
with:
node-version: '18.x'
- run: npm ci
- run: npm run build --if-present
- run: npm test
108 changes: 4 additions & 104 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ All this functionality and much more is also highly costumisable!

cdk-photo-archive can also work with existing S3 storage archives on AWS. This way photographers already taking advantage of cloud storage can still use cdk-phot-archive's hashing, meta and rekognition tagging features.

For full details on all available features, and configuration see the wiki page : https://github.com/bensoer/cdk-photo-archive/wiki

# Prerequsites

## AWS CLI & Account
Expand Down Expand Up @@ -35,110 +37,8 @@ This will deploy the default configuration and setup. For more details on config
# Configuration
cdk-photo-archive comes with a number of features and customisations to work with your AWS account, archiving setup and preferences.

Within `conf/configuration.ts` contains `getConfiguration()` method which returns an `IConfiguration` object containing all of the configuration settings that can be done to setup the project. Some of the paramaters are _required_ and others are _optional_ with default values if they are not provided. The following table is a breakdown of all the settings

| Setting | Type | Required? | Default Value | Description |
| ------- | ---- | -------- | ------------- | ----------- |
| `feautures` | `Array<Features>` | YES | N/A | Specify which feature lambdas to deploy |
| `deploymentRegion` | `Regions` | YES | N/A | Specify which AWS Region to deploy to |
| `useExistingBuckets` | `Array<string>` | NO | undefined | Specify existing buckets to use with cdk-photo-archive instead of creating them|
| `bucketNamePrefix` | `string` | NO | pt | Specify a prefix to append to the buckets created by the cdk-photo-archive. **Note:** This functionality is only valid if `useExistingBuckets` is `undefined` |
| `appendRegionToBucketName` | `boolean` | NO | true | Set whether to append the deployment region (ex: us-east-1) to the bucket names. **Note:** This functionality is only valid if `useExistingBuckets` is `undefined`|
| `switchToInfrequentAccessTierAfterDays` | `number` | NO | 90 | How many days until a file is moved to Infrequent Access Tier |
| `switchToGlacierAccessTierAfterDays` | `number` | NO | 120 | How many days until a file is moved to Glacier Access Tier |
| `photoArchiveStackName` | `string` | NO | photo-archive-stack | CloudFormation stack name for the photo archive stack |
| `photoArchiveSettingsStackName` | `string` | NO | photo-archive-settings-stack | CloudFormation stack name for the phot archive settings stack|
| `photoArchiveBucketStackName` | `string` | NO | photo-archive-bucket-stack | CloudFormation stack name for the photo archive bucket stack name |
Within `conf/configuration.ts` contains `getConfiguration()` method which returns an `IConfiguration` object containing all of the configuration settings that can be done to setup the project. Some of the paramaters are _required_ and others are _optional_ with default values if they are not provided. The minimum necessary settings to get the project up and running have been set within the repo. For details on all of the settings available, see the wiki page (https://github.com/bensoer/cdk-photo-archive/wiki/Configuration-Options) or the comments within `lib/conf/i-configuration.ts`

You can also view a breakdown of what each and every setting does within `lib/conf/i-configuration.ts`

## Setting Feature Lambdas
Feature lambdas are lambdas that execute a certain task every time a photo is uploaded to the archive bucket. To use these lambdas, they must be listed in the `features` setting in the `conf/configuration.ts` file. Each feature lambda is mapped to a value in the `Features` enum. Each feature lambda is as follows:

| Name | Features Enum Value | Description |
| ---- | ------------------- | ----------- |
| Hash Tag Lambda | `Features.HASH_TAG` | Tags each file with an MD5, SHA1, SHA256 and SHA512 hash of the file |
| Photo Meta Tag Lambda | `Features.PHOTO_META_TAG` | Tags each photo with Camera & Lense information, Photo Information (ISO, Aperture, Shutter Speed, etc) and the Image Date based on EXIF data within the photo. Only valid for JPEG, JPG, PNG and DNG files. |
| Photo Rekog Tag Lambda | `Features.PHOTO_REKOG_TAG` | Uses AWS Rekognition to add up to 10 labels describing the contents of the photo. Only valid for JPEG, JPG and PNG files. |

An example of settings the features setting may look like this:
```javascript
features: [
Features.HASH_TAG,
Features.PHOTO_META_TAG,
Features.PHOTO_REKOG_TAG
]
```

## Tiered Archive Storage
cdk-photo-archive's bucket is configured with tiered storage to reduce costs for your archived files. It is done in a scheduled manner though so that more recently uploaded files can still be accessed with minimal detriment. The tiering is configured in a way to slowly reduce storage cost, but at the expense of more expensive and difficult retrieval. This allows photographers to still retrieve files if shortly after uploading find they still need the photos.

cdk-photo-archive takes advantage of 3 AWS S3 Storage tiers: Standard, Infrequent Access (Standard-IA), and Glacier (Flexible Retrieval). You can find more details of this from AWS here: https://aws.amazon.com/s3/storage-classes/

By default, cdk-photo-archive will transition archived files from Standard tier to Infrequent Access tier after 90 days. Then, after 120 days will transition archived files to Glacier (Flexible Retrieval). cdk-photo-archive currently does not have implemented support for Glacier Deep Archive. These transition days can be changed within the `conf/configuration.ts` file. `switchToInfrequentAccessAfterDays` and `switchToGlacierAccessAfterDays` settings configures these transition day values. Applying these changes will require a deployment of the cdk.

An example of these settings may look like this:
```javascript
switchToInfrequentAccessAfterDays: 90,
switchToGlacierAccessAfterDays: 120
```

## Bring Your Own Bucket (BYOB)
cdk-photo-archive supports Bring-Your-Own-Bucket. This way you can configure and control your existing archive buckets as you wish, but still take advantage of the cdk-photo-archive's feature lambdas. Configuration of this is done by setting the `useExistingBuckets` parameter in the `conf/configuration.ts` file. By default this value is `undefined`. Once it is defined, the cdk-photo-archive will no longer manage or create any buckets for the archive. Any features related to archive storage including tiering and naming will be ineffective.

`useExistingBuckets` setting takes an array of strings. Where the strings are the full ARN values of the existing buckets you want cdk-photo-archive to use. An example of this configuration may look as follows:
```javascript
useExistingBuckets: [
"arn:aws:s3:::pt-photo-archive",
"arn:aws:s3:::my-other-archive"
]
```
**Note:** If at a later time, you want to revert this, remove the setting from `conf/configuration.ts`, set the value to `undefined` or leave it as an empty array

**Note:** If you have already deploy the cdk-photo-archive and it has created the buckets for you. You will need to first fully destroy the stack with `cdk destroy --all` before configuring BYOB and redeploying.

**Tip:** cdk-photo-archive will create the archive bucket for you if you do not supply one. In the event you need to entirely delete the cdk-photo-archive stack. The archive bucket will not be deleted. When restoring the stack though, the CDK will error that the bucket it wishes to create already exists. You can work around this issue by then copying the ARN of the bucket created previously from the CDK and treating it like BYOB.

# Configuration After Deployment
cdk-photo-archive also deploys a number of settings to SSM Parameter Settings on AWS. These can be found under `/pa` section. Changes to these settings can be made to effect the system in real-time if need be, without having to re-deploy from the CDK. *WARNING* If possible, it is better to redeploy changes from the CDK. The settings available within SSM Parameter Store are as follows:

| Path | Description |
| ---- | ----------- |
| `/pa/features` | Contains a StringList of all available features that were deployed with cdk-photo-archive |
| `/pa/features/<featurename>/enabled` | Setting value of TRUE or FALSE as to whether the given feature is enabled. When deploying this value is automatically set to TRUE. By changing it to FALSE, the given feature is disabled from future executions.


# Developer Notes
## TODO - Feature List
- ~~Set "feature" names as more global - its hardcoded currently~~ - DONE
- ~~Ability to import existing S3 buckets instead of having CDK project create and manage them - avoid users from having to dev CDK, make this a setting~~ - DONE
- Configuration file
- ~~Ability to enable/disable which "features" to have applied to archive~~ - DONE
- Ability to specify naming of S3 Buckets, SQS Queues and Lambda Functions
- V1 - able to specify a prefix so that its unique within account / unique within AWS
- V2 - full name overrides
- ~~Implement creation of SSM Parameter store - housing enable/disable and lambda ARNs~~ - DONE
- ~~Improve dynamicness of running "features" - users should be able to enable/disable in Parameter Store or defaults in CDK and things will adjust appropriatly~~ - DONE
- Store status information of each photo in dynamoDB table ?
- Make this optional, enable/disable also as a setting in the configuration file
- Entry stores Bucket, Key and which "features" have been applied to the photo
- Entry stores all EXIF tag data about the photo
- Entry stores all Rekognition labels at default validity (55% vs 75% which is tagged)
- ~~Complete README documentation for user friendly installation and uninstallation~~ - DONE
- ~~Make naming consistent between Features - SSM Param name, Lambda Name, Feature Name, Feature Enum Name~~ - DONE


## Welcome to your CDK TypeScript project!

This is a blank project for TypeScript development with CDK.

The `cdk.json` file tells the CDK Toolkit how to execute your app.

### Useful commands

* `npm run build` compile typescript to js
* `npm run watch` watch for changes and compile
* `npm run test` perform the jest unit tests
* `cdk deploy` deploy this stack to your default AWS account/region
* `cdk diff` compare deployed stack with current state
* `cdk synth` emits the synthesized CloudFormation template
Feature lambdas are lambdas that execute a certain task every time a photo is uploaded to the archive bucket. To use these lambdas, they must be listed in the `features` setting in the `conf/configuration.ts` file. Each feature lambda is mapped to a value in the `Features` enum. See https://github.com/bensoer/cdk-photo-archive/wiki/Configuration-Options#features for details
50 changes: 50 additions & 0 deletions archive/bucket-event-handler/bucket-event-handler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { Construct } from "constructs";
import { ServicePrincipals } from "cdk-constants";
import {
aws_sqs as sqs,
aws_iam as iam,
aws_s3 as s3,
aws_sns as sns,
} from 'aws-cdk-lib'
import { FormatUtils } from "../../lib/utils/formatutils";

export interface BucketEventHandlerProps {
buckets: Array<s3.IBucket>
eventTopicName: string

}

export class BucketEventHandler extends Construct{

public readonly eventTopic: sns.Topic

constructor(scope:Construct, id:string, props: BucketEventHandlerProps){
super(scope, id)

this.eventTopic = new sns.Topic(this, `BucketEventHandlerTopic`, {
displayName: `BucketEventHandlerTopic`,
topicName: props.eventTopicName
})

// Add EventTopic Policy to Allow our buckets to send notifications to it
this.eventTopic.addToResourcePolicy(
new iam.PolicyStatement({
principals:[
new iam.ServicePrincipal(ServicePrincipals.S3)
],
actions:[
"sns:Publish",
],
resources:[
this.eventTopic.topicArn
],
conditions:{
"ArnLike": {
"aws:SourceArn": FormatUtils.convertBucketsToPolicyArns(props.buckets)
}
}
})
)

}
}
96 changes: 96 additions & 0 deletions archive/bucket-topic-event-linker/bucket-topic-event-linker.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import { Construct } from "constructs";

import {
aws_iam as iam,
aws_lambda as lambda,
aws_logs as logs,
custom_resources as cr,
aws_s3 as s3,
aws_sqs as sqs,
aws_sns as sns,
hashMapper
} from "aws-cdk-lib"
import {
Duration,
CustomResource
} from "aws-cdk-lib"
import * as path from 'path'
import * as crypto from 'crypto'
import { ServicePrincipals } from "cdk-constants";
import { HashUtil } from "../../lib/utils/hashutil";
import { Sns } from "aws-cdk-lib/aws-ses-actions";
import { FormatUtils } from "../../lib/utils/formatutils";
import { LayerTypes } from "../../lib/constructs/lambda-layers/lambda-layers";



export interface BucketTopicEventLinkerProps {
buckets: Array<s3.IBucket>
topic: sns.Topic,
onLayerRequestListener: (layerTypes: Array<LayerTypes>) => Array<lambda.LayerVersion>
}

export class BucketTopicEventLinker extends Construct{

constructor(scope: Construct, id: string, props: BucketTopicEventLinkerProps){
super(scope, id)

//const hashCode = HashUtil.generateIDSafeHash(props.bucket.bucketArn + props.bucket.bucketName + props.topic.topicArn, 15)

const eventLinkingLambdaRole = new iam.Role(this, `CustomResourceRole`, {
roleName: `btel-lambda-service-role`,
description: "Assumed Role By btel-event-linking-function",
assumedBy: new iam.ServicePrincipal(ServicePrincipals.LAMBDA)
})

const eventLinkingLambdaS3Policy = new iam.Policy(this, `CustomResourceRoleS3Policy`, {
policyName: `btel-lambda-s3-policy`,
roles: [
eventLinkingLambdaRole
],
statements: [
new iam.PolicyStatement({
actions:[
's3:PutBucketNotification',
's3:GetBucketNotification'
],
resources: FormatUtils.convertBucketsToArns(props.buckets)
})
]
})

const eventLinkingLambda = new lambda.Function(this, `CustomResourceLambda`,{
functionName: `btel-function`,
description: 'Event Linking For S3 Bucket Events To SNS',
runtime: lambda.Runtime.PYTHON_3_8,
layers: props.onLayerRequestListener([LayerTypes.COMMONLIBLAYER]),
handler: 'lambda_function.on_event',
code: lambda.Code.fromAsset(path.join(__dirname, './res')),
role: eventLinkingLambdaRole,
timeout: Duration.minutes(15)
})


const eventLinkingCustomResourceProvider = new cr.Provider(this, `CustomResourceProvider`, {
onEventHandler: eventLinkingLambda,
logRetention: logs.RetentionDays.ONE_DAY,
})


const eventLinkingCustomResource = new CustomResource(this, `CustomResource`, {
resourceType: `Custom::BucketTopic-EventLinker`,
serviceToken: eventLinkingCustomResourceProvider.serviceToken,
properties: {
"bucketArns": FormatUtils.convertBucketsToArns(props.buckets),
"bucketNames": FormatUtils.convertBucketsToNames(props.buckets),
"snsTopicArn": props.topic.topicArn,
}
})

for(const bucket of props.buckets){
eventLinkingCustomResource.node.addDependency(bucket)
}
eventLinkingCustomResource.node.addDependency(props.topic)

}
}
Loading

0 comments on commit c316fde

Please sign in to comment.