Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
73 lines (73 sloc) 2.59 KB
AWSTemplateFormatVersion: '2010-09-09'
Description: 'This template deploys a Glue Database, (2) Crawlers, and Crawler IAM Role to AWS.'
Parameters:
ZeppelinDemoBucket:
Type: String
Description: 'S3 bucket for Zeppelin EMR demo files'
GlueDatabase:
Type: String
Description: 'Glue database used for Zeppelin EMR demo'
Default: 'zeppelin_demo'
Resources:
ZeppelinDemoGlueDatabase:
Type: AWS::Glue::Database
Properties:
CatalogId: !Ref AWS::AccountId
DatabaseInput:
Description: 'Zeppelin EMR demo Data Catalog database'
Name: !Ref GlueDatabase
ZeppelinDemoCrawlerRole:
Type: AWS::IAM::Role
Properties:
RoleName: ZeppelinDemoCrawlerRole
AssumeRolePolicyDocument:
Statement:
- Effect: Allow
Principal:
Service:
- glue.amazonaws.com
Action:
- sts:AssumeRole
Path: /
ManagedPolicyArns:
- arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole
Policies:
- PolicyName: ZeppelinDemoCrawlerPolicy
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- s3:GetObject
- s3:PutObject
- s3:DeleteObject
Resource: '*'
CrawlerBakery:
Type: AWS::Glue::Crawler
Properties:
Name: bakery-transactions-crawler
Role: !GetAtt 'ZeppelinDemoCrawlerRole.Arn'
Targets:
S3Targets:
- Path: !Join ['', ['s3://', !Ref 'ZeppelinDemoBucket', '/data/bakery-csv']]
- Path: !Join ['', ['s3://', !Ref 'ZeppelinDemoBucket', '/data/bakery-parquet']]
- Path: !Join ['', ['s3://', !Ref 'ZeppelinDemoBucket', '/data/bakery-summary-parquet']]
DatabaseName: !Ref ZeppelinDemoGlueDatabase
DependsOn:
- ZeppelinDemoCrawlerRole
- ZeppelinDemoGlueDatabase
CrawlerRatings:
Type: AWS::Glue::Crawler
Properties:
Name: movie-ratings-crawler
Role: !GetAtt 'ZeppelinDemoCrawlerRole.Arn'
Targets:
S3Targets:
- Path: !Join ['', ['s3://', !Ref 'ZeppelinDemoBucket', '/data/ratings-parquet-100k']]
- Path: !Join ['', ['s3://', !Ref 'ZeppelinDemoBucket', '/data/movies-parquet-100k']]
- Path: !Join ['', ['s3://', !Ref 'ZeppelinDemoBucket', '/data/ratings-parquet-27MM']]
- Path: !Join ['', ['s3://', !Ref 'ZeppelinDemoBucket', '/data/movies-parquet-27MM']]
DatabaseName: !Ref ZeppelinDemoGlueDatabase
DependsOn:
- ZeppelinDemoCrawlerRole
- ZeppelinDemoGlueDatabase
You can’t perform that action at this time.