Skip to content

Commit

Permalink
Merge pull request #214 from guardian/salesforce_raw
Browse files Browse the repository at this point in the history
Salesforce export state machine
  • Loading branch information
pvighi committed Nov 16, 2018
2 parents f8cd1c8 + aeb3a1b commit 70b54a7
Show file tree
Hide file tree
Showing 37 changed files with 2,339 additions and 12 deletions.
6 changes: 5 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ lazy val root = all(project in file(".")).enablePlugins(RiffRaffArtifact).aggreg
s3ConfigValidator,
`new-product-api`,
`effects-sqs`,
`effects-ses`
`effects-ses`,
`sf-datalake-export`
).dependsOn(zuora, handler, effectsDepIncludingTestFolder, `effects-sqs`, testDep)

lazy val `identity-backfill` = all(project in file("handlers/identity-backfill")) // when using the "project identity-backfill" command it uses the lazy val name
Expand Down Expand Up @@ -201,6 +202,9 @@ lazy val `cancellation-sf-cases` = all(project in file("handlers/cancellation-sf
.enablePlugins(RiffRaffArtifact)
.dependsOn(salesforce, handler, effectsDepIncludingTestFolder, testDep)

lazy val `sf-datalake-export` = all(project in file("handlers/sf-datalake-export"))
.enablePlugins(RiffRaffArtifact)
.dependsOn(salesforce, handler, effectsDepIncludingTestFolder, testDep)

// ==== END handlers ====

Expand Down
4 changes: 4 additions & 0 deletions handlers/sf-datalake-export/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# sf-datalake-export
This state machine is used to generate csv reports from salesforce using their [Bulk API](https://developer.salesforce.com/docs/atlas.en-us.api_asynch.meta/api_asynch/asynch_api_code_curl_walkthrough.htm).


16 changes: 16 additions & 0 deletions handlers/sf-datalake-export/build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// "Any .sbt files in foo, say foo/build.sbt, will be merged with the build definition for the entire build, but scoped to the hello-foo project."
// https://www.scala-sbt.org/0.13/docs/Multi-Project.html
name := "sf-datalake-export"
description:= "Export salesforce data to the data lake"

scalacOptions += "-Ypartial-unification"

assemblyJarName := "sf-datalake-export.jar"
riffRaffPackageType := assembly.value
riffRaffUploadArtifactBucket := Option("riffraff-artifact")
riffRaffUploadManifestBucket := Option("riffraff-builds")
riffRaffManifestProjectName := "MemSub::Membership Admin::SF Data Lake Export"
riffRaffArtifactResources += (file("handlers/sf-datalake-export/cfn.yaml"), "cfn/cfn.yaml")

libraryDependencies += "org.scala-lang.modules" %% "scala-xml" % "1.1.0"

336 changes: 336 additions & 0 deletions handlers/sf-datalake-export/cfn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,336 @@
AWSTemplateFormatVersion: "2010-09-09"
Description: export salesforce data to the data lake

Parameters:
Stage:
Description: Stage name
Type: String
AllowedValues:
- CODE
- PROD
Default: CODE
Mappings:
StageMap:
CODE: #we should write into a bucket in the ophan account really
destBucketArn: "arn:aws:s3:::gu-salesforce-export-test/*"
PROD:
destBucketArn: "arn:aws:s3:::gu-salesforce-export-test/*"
Resources:
StartJobRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Statement:
- Effect: Allow
Principal:
Service:
- lambda.amazonaws.com
Action:
- sts:AssumeRole
Path: /
Policies:
- PolicyName: LambdaPolicy
PolicyDocument:
Statement:
- Effect: Allow
Action:
- logs:CreateLogGroup
- logs:CreateLogStream
- logs:PutLogEvents
- lambda:InvokeFunction
Resource: !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/sf-start-export-job-${Stage}:log-stream:*"
- PolicyName: ReadPrivateCredentials
PolicyDocument:
Statement:
- Effect: Allow
Action: s3:GetObject
Resource: !Sub "arn:aws:s3:::gu-reader-revenue-private/membership/support-service-lambdas/${Stage}/sfExportAuth-${Stage}*.json"
StartJob:
Type: AWS::Lambda::Function
Properties:
Description: start a sf export job
FunctionName:
!Sub sf-start-export-job-${Stage}
Code:
S3Bucket: zuora-auto-cancel-dist
S3Key: !Sub membership/${Stage}/sf-datalake-export/sf-datalake-export.jar
Handler: com.gu.sf_datalake_export.handlers.StartJobHandler::apply
Environment:
Variables:
Stage: !Ref Stage
Role:
!GetAtt StartJobRole.Arn
MemorySize: 1536
Runtime: java8
Timeout: 300
DependsOn:
- StartJobRole
BatchStateRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Statement:
- Effect: Allow
Principal:
Service:
- lambda.amazonaws.com
Action:
- sts:AssumeRole
Path: /
Policies:
- PolicyName: LambdaPolicy
PolicyDocument:
Statement:
- Effect: Allow
Action:
- logs:CreateLogGroup
- logs:CreateLogStream
- logs:PutLogEvents
- lambda:InvokeFunction
Resource: !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/sf-get-batch-state-${Stage}:log-stream:*"
- PolicyName: ReadPrivateCredentials
PolicyDocument:
Statement:
- Effect: Allow
Action: s3:GetObject
Resource: !Sub "arn:aws:s3:::gu-reader-revenue-private/membership/support-service-lambdas/${Stage}/sfExportAuth-${Stage}*.json"
GetBatchesLambda:
Type: AWS::Lambda::Function
Properties:
Description: get state of batches related to an salesforce bulk api query
FunctionName:
!Sub sf-get-batch-state-${Stage}
Code:
S3Bucket: zuora-auto-cancel-dist
S3Key: !Sub membership/${Stage}/sf-datalake-export/sf-datalake-export.jar
Handler: com.gu.sf_datalake_export.handlers.GetBatchesHandler::apply
Environment:
Variables:
Stage: !Ref Stage
Role:
!GetAtt BatchStateRole.Arn
MemorySize: 1536
Runtime: java8
Timeout: 300
DependsOn:
- BatchStateRole
DownloadBatchRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Statement:
- Effect: Allow
Principal:
Service:
- lambda.amazonaws.com
Action:
- sts:AssumeRole
Path: /
Policies:
- PolicyName: LambdaPolicy
PolicyDocument:
Statement:
- Effect: Allow
Action:
- logs:CreateLogGroup
- logs:CreateLogStream
- logs:PutLogEvents
- lambda:InvokeFunction
Resource: !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/sf-get-batch-state-${Stage}:log-stream:*"
- PolicyName: ReadPrivateCredentials
PolicyDocument:
Statement:
- Effect: Allow
Action: s3:GetObject
Resource: !Sub "arn:aws:s3:::gu-reader-revenue-private/membership/support-service-lambdas/${Stage}/sfExportAuth-${Stage}*.json"
- PolicyName: DestBucket
PolicyDocument:
Statement:
- Effect: Allow
Action:
- s3:AbortMultipartUpload
- s3:DeleteObject
- s3:GetObject
- s3:GetObjectAcl
- s3:GetBucketAcl
- s3:ListBucket
- s3:PutObject
- s3:GetObjectVersion
- s3:DeleteObjectVersion
Resource: !FindInMap [StageMap, !Ref Stage, destBucketArn]
DownloadBatch:
Type: AWS::Lambda::Function
Properties:
Description: download result of completed bulk api batches into s3
FunctionName:
!Sub sf-download-batch-${Stage}
Code:
S3Bucket: zuora-auto-cancel-dist
S3Key: !Sub membership/${Stage}/sf-datalake-export/sf-datalake-export.jar
Handler: com.gu.sf_datalake_export.handlers.DownloadBatchHandler::apply
Environment:
Variables:
Stage: !Ref Stage
Role:
!GetAtt DownloadBatchRole.Arn
MemorySize: 1536
Runtime: java8
Timeout: 900
DependsOn:
- DownloadBatchRole
EndJobRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Statement:
- Effect: Allow
Principal:
Service:
- lambda.amazonaws.com
Action:
- sts:AssumeRole
Path: /
Policies:
- PolicyName: LambdaPolicy
PolicyDocument:
Statement:
- Effect: Allow
Action:
- logs:CreateLogGroup
- logs:CreateLogStream
- logs:PutLogEvents
- lambda:InvokeFunction
Resource: !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/sf-end-export-job-${Stage}:log-stream:*"
- PolicyName: ReadPrivateCredentials
PolicyDocument:
Statement:
- Effect: Allow
Action: s3:GetObject
Resource: !Sub "arn:aws:s3:::gu-reader-revenue-private/membership/support-service-lambdas/${Stage}/sfExportAuth-${Stage}*.json"
EndJob:
Type: AWS::Lambda::Function
Properties:
Description: close a sf export job
FunctionName:
!Sub sf-end-export-job-${Stage}
Code:
S3Bucket: zuora-auto-cancel-dist
S3Key: !Sub membership/${Stage}/sf-datalake-export/sf-datalake-export.jar
Handler: com.gu.sf_datalake_export.handlers.EndJobHandler::apply
Environment:
Variables:
Stage: !Ref Stage
Role:
!GetAtt EndJobRole.Arn
MemorySize: 1536
Runtime: java8
Timeout: 300
DependsOn:
- EndJobRole
StatesExecutionRole:
Type: 'AWS::IAM::Role'
Properties:
AssumeRolePolicyDocument:
Version: 2012-10-17
Statement:
Effect: Allow
Principal:
Service: !Sub 'states.${AWS::Region}.amazonaws.com'
Action: 'sts:AssumeRole'
Path: /
Policies:
- PolicyName: StatesExecutionPolicy
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- 'lambda:InvokeFunction'
Resource: '*'
StateMachine:
Type: 'AWS::StepFunctions::StateMachine'
Properties:
StateMachineName: !Sub 'salesforce-export-${Stage}'
RoleArn: !GetAtt
- StatesExecutionRole
- Arn
DefinitionString: !Sub
- |-
{
"StartAt": "StartJob",
"States": {
"StartJob": {
"Type": "Task",
"Resource": "${startJobArn}",
"Next": "WaitSomeTime",
"Retry": [{
"ErrorEquals": ["States.ALL"],
"IntervalSeconds": 30,
"MaxAttempts": 3
}]
},
"WaitSomeTime": {
"Type": "Wait",
"Seconds": 5,
"Next": "getBatches"
},
"getBatches": {
"Type": "Task",
"Resource": "${getBatchesArn}",
"Next": "checkPendingBatches",
"Retry": [{
"ErrorEquals": ["States.ALL"],
"IntervalSeconds": 30,
"MaxAttempts": 3
}]
},
"checkPendingBatches": {
"Type": "Choice",
"Choices": [{
"Variable": "$.jobStatus",
"StringEquals": "Completed",
"Next": "downloadBatch"
}],
"Default": "WaitSomeTime"
},
"downloadBatch": {
"Type": "Task",
"Resource": "${downloadBatchArn}",
"Next": "CheckPendingDownloads",
"Retry": [{
"ErrorEquals": ["States.ALL"],
"IntervalSeconds": 30,
"MaxAttempts": 3
}]
},
"CheckPendingDownloads": {
"Type": "Choice",
"Choices": [{
"Variable": "$.done",
"BooleanEquals": true,
"Next": "endJob"
}],
"Default": "downloadBatch"
},
"endJob": {
"Type": "Task",
"Resource": "${endJobArn}",
"Next": "done",
"Retry": [{
"ErrorEquals": ["States.ALL"],
"IntervalSeconds": 30,
"MaxAttempts": 3
}]
},
"done": {
"Type": "Pass",
"End": true
}
}
}
- {
startJobArn: !GetAtt [ StartJob, Arn ],
getBatchesArn: !GetAtt [ GetBatchesLambda, Arn ],
downloadBatchArn: !GetAtt [ DownloadBatch, Arn ],
endJobArn: !GetAtt [ EndJob, Arn ]
}
Loading

0 comments on commit 70b54a7

Please sign in to comment.