From a3f3955f67a343e08c37d5baa1c1034887bfb88d Mon Sep 17 00:00:00 2001 From: igorborgest Date: Sat, 9 May 2020 12:40:00 -0300 Subject: [PATCH 1/3] Checking multithreading deletes. --- awswrangler/s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/awswrangler/s3.py b/awswrangler/s3.py index 9ba8a0db6..a6de341b4 100644 --- a/awswrangler/s3.py +++ b/awswrangler/s3.py @@ -271,7 +271,7 @@ def delete_objects( else: cpus: int = _utils.ensure_cpu_count(use_threads=use_threads) with concurrent.futures.ThreadPoolExecutor(max_workers=cpus) as executor: - executor.map(_delete_objects, repeat(bucket), chunks, repeat(client_s3)) + list(executor.map(_delete_objects, repeat(bucket), chunks, repeat(client_s3))) def _split_paths_by_bucket(paths: List[str]) -> Dict[str, List[str]]: From 7378d46c52dd1e62011e75238f6a4b1ac90cd789 Mon Sep 17 00:00:00 2001 From: Igor Tavares Date: Mon, 11 May 2020 08:31:17 -0300 Subject: [PATCH 2/3] Update issue templates --- .github/ISSUE_TEMPLATE/question.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/question.md diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md new file mode 100644 index 000000000..b53ffceb8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question.md @@ -0,0 +1,10 @@ +--- +name: Question +about: Ask with as many useful details as possible +title: '' +labels: question +assignees: '' + +--- + + From 022f5acd44f64dbc69a4952613817e66ec6b08b8 Mon Sep 17 00:00:00 2001 From: igorborgest Date: Mon, 11 May 2020 22:32:51 -0300 Subject: [PATCH 3/3] Add VPC and Subnets on CloudFormation template --- ...oy-cloudformation.sh => cloudformation.sh} | 10 +- testing/cloudformation.yaml | 147 +++++++++++++----- testing/parameters.properties | 6 - testing/test_awswrangler/test_cloudwatch.py | 2 +- testing/test_awswrangler/test_data_lake.py | 10 +- testing/test_awswrangler/test_db.py | 2 +- testing/test_awswrangler/test_emr.py | 2 +- testing/{run-tests.sh => tests.sh} | 2 +- .../{run-validations.sh => validations.sh} | 0 9 files changed, 126 insertions(+), 55 deletions(-) rename testing/{deploy-cloudformation.sh => cloudformation.sh} (66%) delete mode 100644 testing/parameters.properties rename testing/{run-tests.sh => tests.sh} (93%) rename testing/{run-validations.sh => validations.sh} (100%) diff --git a/testing/deploy-cloudformation.sh b/testing/cloudformation.sh similarity index 66% rename from testing/deploy-cloudformation.sh rename to testing/cloudformation.sh index 280a0b405..e10912aed 100755 --- a/testing/deploy-cloudformation.sh +++ b/testing/cloudformation.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -set -ex +set -e cfn-lint -t cloudformation.yaml rm -rf temp.yaml @@ -7,12 +7,14 @@ cfn-flip -c -l -n cloudformation.yaml temp.yaml cfn-lint -t temp.yaml mv temp.yaml cloudformation.yaml +read -rp "Databases password (e.g. 123456Ab): " password + aws cloudformation deploy \ --template-file cloudformation.yaml \ - --stack-name aws-data-wrangler-test \ + --stack-name aws-data-wrangler \ --capabilities CAPABILITY_IAM \ - --parameter-overrides $(cat parameters.properties) + --parameter-overrides DatabasesPassword="$password" aws cloudformation update-termination-protection \ --enable-termination-protection \ - --stack-name aws-data-wrangler-test + --stack-name aws-data-wrangler diff --git a/testing/cloudformation.yaml b/testing/cloudformation.yaml index 1b4b90f08..b62062112 100644 --- a/testing/cloudformation.yaml +++ b/testing/cloudformation.yaml @@ -1,31 +1,97 @@ AWSTemplateFormatVersion: 2010-09-09 Description: | - AWS Data Wrangler Test Infrastructure + AWS Data Wrangler Development Infrastructure Parameters: - VpcId: - Type: String - Description: Redshift VPC ID - SubnetId: - Type: String - Description: Redshift Subnet ID - SubnetId2: - Type: String - Description: Redshift Subnet ID - SubnetAz: - Type: String - Description: Subnet AZ DatabasesPassword: Type: String Description: Password for all databases NoEcho: true - AWSUserForTests: - Type: String - Description: AWS User that will running the tests on the CLI Resources: + VPC: + Type: AWS::EC2::VPC + Properties: + CidrBlock: 10.19.224.0/19 + EnableDnsSupport: true + EnableDnsHostnames: true + Tags: + - Key: Name + Value: aws-data-wrangler + InternetGateway: + Type: AWS::EC2::InternetGateway + Properties: + Tags: + - Key: Name + Value: aws-data-wrangler + InternetGatewayAttachment: + Type: AWS::EC2::VPCGatewayAttachment + Properties: + InternetGatewayId: + Ref: InternetGateway + VpcId: + Ref: VPC + PublicSubnet1: + Type: AWS::EC2::Subnet + Properties: + VpcId: + Ref: VPC + AvailabilityZone: + Fn::Select: + - 0 + - Fn::GetAZs: '' + CidrBlock: 10.19.229.0/24 + MapPublicIpOnLaunch: true + Tags: + - Key: Name + Value: aws-data-wrangler Public Subnet (AZ1) + PublicSubnet2: + Type: AWS::EC2::Subnet + Properties: + VpcId: + Ref: VPC + AvailabilityZone: + Fn::Select: + - 1 + - Fn::GetAZs: '' + CidrBlock: 10.19.230.0/24 + MapPublicIpOnLaunch: true + Tags: + - Key: Name + Value: aws-data-wrangler Public Subnet (AZ2) + PublicRouteTable: + Type: AWS::EC2::RouteTable + Properties: + VpcId: + Ref: VPC + Tags: + - Key: Name + Value: aws-data-wrangler Public Routes + DefaultPublicRoute: + Type: AWS::EC2::Route + DependsOn: InternetGatewayAttachment + Properties: + RouteTableId: + Ref: PublicRouteTable + DestinationCidrBlock: 0.0.0.0/0 + GatewayId: + Ref: InternetGateway + PublicSubnet1RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + Properties: + RouteTableId: + Ref: PublicRouteTable + SubnetId: + Ref: PublicSubnet1 + PublicSubnet2RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + Properties: + RouteTableId: + Ref: PublicRouteTable + SubnetId: + Ref: PublicSubnet2 KmsKeyAlias: Type: AWS::KMS::Alias Properties: - AliasName: alias/aws-data-wrangler-test-key + AliasName: alias/aws-data-wrangler-key TargetKeyId: Ref: KmsKey KmsKey: @@ -34,7 +100,7 @@ Resources: Description: Aws Data Wrangler Test Key. KeyPolicy: Version: '2012-10-17' - Id: aws-data-wrangler-test-key + Id: aws-data-wrangler-key Statement: - Sid: Enable IAM User Permissions Effect: Allow @@ -46,8 +112,7 @@ Resources: - Sid: Allow administration of the key Effect: Allow Principal: - AWS: - Fn::Sub: arn:aws:iam::${AWS::AccountId}:user/${AWSUserForTests} + AWS: '*' Action: - kms:Create* - kms:Describe* @@ -142,12 +207,12 @@ Resources: Properties: Description: AWS Data Wrangler Test Arena - Redshift Subnet Group SubnetIds: - - Ref: SubnetId + - Ref: PublicSubnet1 DatabaseSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: VpcId: - Ref: VpcId + Ref: VPC GroupDescription: AWS Data Wrangler Test Arena - Redshift security group DatabaseSecurityGroupIngress: Type: AWS::EC2::SecurityGroupIngress @@ -185,7 +250,7 @@ Resources: CatalogId: Ref: AWS::AccountId DatabaseInput: - Name: awswrangler_test + Name: aws_data_wrangler Description: AWS Data Wrangler Test Arena - Glue Database LogGroup: Type: AWS::Logs::LogGroup @@ -201,8 +266,8 @@ Resources: Properties: DBSubnetGroupDescription: RDS Database Subnet Group SubnetIds: - - Ref: SubnetId - - Ref: SubnetId2 + - Ref: PublicSubnet1 + - Ref: PublicSubnet2 AuroraRole: Type: AWS::IAM::Role Properties: @@ -232,14 +297,15 @@ Resources: PostgresqlParameterGroup: Type: AWS::RDS::DBClusterParameterGroup Properties: - Description: Postgres 10 - Family: aurora-postgresql10 + Description: Postgres 11 + Family: aurora-postgresql11 Parameters: apg_plan_mgmt.capture_plan_baselines: 'off' AuroraClusterPostgresql: Type: AWS::RDS::DBCluster Properties: Engine: aurora-postgresql + EngineVersion: '11.6' DBClusterIdentifier: postgresql-cluster-wrangler MasterUsername: test MasterUserPassword: @@ -261,6 +327,7 @@ Resources: Type: AWS::RDS::DBInstance Properties: Engine: aurora-postgresql + EngineVersion: '11.6' DBInstanceIdentifier: postgresql-instance-wrangler DBClusterIdentifier: Ref: AuroraClusterPostgresql @@ -290,6 +357,7 @@ Resources: Type: AWS::RDS::DBCluster Properties: Engine: aurora-mysql + EngineVersion: '5.7' DBClusterIdentifier: mysql-cluster-wrangler MasterUsername: test MasterUserPassword: @@ -311,6 +379,7 @@ Resources: Type: AWS::RDS::DBInstance Properties: Engine: aurora-mysql + EngineVersion: '5.7' DBInstanceIdentifier: mysql-instance-wrangler DBClusterIdentifier: Ref: AuroraClusterMysql @@ -328,11 +397,13 @@ Resources: ConnectionType: JDBC PhysicalConnectionRequirements: AvailabilityZone: - Ref: SubnetAz + Fn::Select: + - 0 + - Fn::GetAZs: '' SecurityGroupIdList: - Ref: DatabaseSecurityGroup SubnetId: - Ref: SubnetId + Ref: PublicSubnet1 ConnectionProperties: JDBC_CONNECTION_URL: Fn::Sub: jdbc:redshift://${Redshift.Endpoint.Address}:${Redshift.Endpoint.Port}/test @@ -350,11 +421,13 @@ Resources: ConnectionType: JDBC PhysicalConnectionRequirements: AvailabilityZone: - Ref: SubnetAz + Fn::Select: + - 0 + - Fn::GetAZs: '' SecurityGroupIdList: - Ref: DatabaseSecurityGroup SubnetId: - Ref: SubnetId + Ref: PublicSubnet1 ConnectionProperties: JDBC_CONNECTION_URL: Fn::Sub: jdbc:postgresql://${AuroraInstancePostgresql.Endpoint.Address}:${AuroraInstancePostgresql.Endpoint.Port}/postgres @@ -372,11 +445,13 @@ Resources: ConnectionType: JDBC PhysicalConnectionRequirements: AvailabilityZone: - Ref: SubnetAz + Fn::Select: + - 0 + - Fn::GetAZs: '' SecurityGroupIdList: - Ref: DatabaseSecurityGroup SubnetId: - Ref: SubnetId + Ref: PublicSubnet1 ConnectionProperties: JDBC_CONNECTION_URL: Fn::Sub: jdbc:mysql://${AuroraInstanceMysql.Endpoint.Address}:${AuroraInstanceMysql.Endpoint.Port}/test @@ -398,7 +473,7 @@ Resources: - AttributeName: attr_range KeyType: RANGE BillingMode: PAY_PER_REQUEST - TableName: aws-data-wrangler-test + TableName: aws-data-wrangler Outputs: BucketName: Value: @@ -449,11 +524,11 @@ Outputs: Description: LogStream name. SubnetId: Value: - Ref: SubnetId + Ref: PublicSubnet1 Description: Subnet ID SubnetId2: Value: - Ref: SubnetId2 + Ref: PublicSubnet2 Description: Subnet ID 2 PostgresqlAddress: Value: diff --git a/testing/parameters.properties b/testing/parameters.properties deleted file mode 100644 index 8e2ba42f2..000000000 --- a/testing/parameters.properties +++ /dev/null @@ -1,6 +0,0 @@ -VpcId=VPC_ID -SubnetId=SUBNET_ID -SubnetId2=SUBNET_ID2 -SubnetAz=AVAILABILITY_ZONE -DatabasesPassword=REDSHIFT_PASSWORD -AWSUserForTests=AWS_USER_THAT_WILL_RUN_THE_TESTS_ON_CLI \ No newline at end of file diff --git a/testing/test_awswrangler/test_cloudwatch.py b/testing/test_awswrangler/test_cloudwatch.py index eced7a754..6ae4cf673 100644 --- a/testing/test_awswrangler/test_cloudwatch.py +++ b/testing/test_awswrangler/test_cloudwatch.py @@ -14,7 +14,7 @@ @pytest.fixture(scope="module") def cloudformation_outputs(): - response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler-test") + response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler") outputs = {} for output in response.get("Stacks")[0].get("Outputs"): outputs[output.get("OutputKey")] = output.get("OutputValue") diff --git a/testing/test_awswrangler/test_data_lake.py b/testing/test_awswrangler/test_data_lake.py index 497fa344f..dc7ee3868 100644 --- a/testing/test_awswrangler/test_data_lake.py +++ b/testing/test_awswrangler/test_data_lake.py @@ -22,7 +22,7 @@ @pytest.fixture(scope="module") def cloudformation_outputs(): - response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler-test") + response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler") outputs = {} for output in response.get("Stacks")[0].get("Outputs"): outputs[output.get("OutputKey")] = output.get("OutputValue") @@ -66,7 +66,7 @@ def external_schema(cloudformation_outputs, database): @pytest.fixture(scope="module") def workgroup0(bucket): - wkg_name = "awswrangler_test_0" + wkg_name = "aws_data_wrangler_0" client = boto3.client("athena") wkgs = client.list_work_groups() wkgs = [x["Name"] for x in wkgs["WorkGroups"]] @@ -87,7 +87,7 @@ def workgroup0(bucket): @pytest.fixture(scope="module") def workgroup1(bucket): - wkg_name = "awswrangler_test_1" + wkg_name = "aws_data_wrangler_1" client = boto3.client("athena") wkgs = client.list_work_groups() wkgs = [x["Name"] for x in wkgs["WorkGroups"]] @@ -111,7 +111,7 @@ def workgroup1(bucket): @pytest.fixture(scope="module") def workgroup2(bucket, kms_key): - wkg_name = "awswrangler_test_2" + wkg_name = "aws_data_wrangler_2" client = boto3.client("athena") wkgs = client.list_work_groups() wkgs = [x["Name"] for x in wkgs["WorkGroups"]] @@ -135,7 +135,7 @@ def workgroup2(bucket, kms_key): @pytest.fixture(scope="module") def workgroup3(bucket, kms_key): - wkg_name = "awswrangler_test_3" + wkg_name = "aws_data_wrangler_3" client = boto3.client("athena") wkgs = client.list_work_groups() wkgs = [x["Name"] for x in wkgs["WorkGroups"]] diff --git a/testing/test_awswrangler/test_db.py b/testing/test_awswrangler/test_db.py index 86a57a74d..6e5bbd9ce 100644 --- a/testing/test_awswrangler/test_db.py +++ b/testing/test_awswrangler/test_db.py @@ -18,7 +18,7 @@ @pytest.fixture(scope="module") def cloudformation_outputs(): - response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler-test") + response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler") outputs = {} for output in response.get("Stacks")[0].get("Outputs"): outputs[output.get("OutputKey")] = output.get("OutputValue") diff --git a/testing/test_awswrangler/test_emr.py b/testing/test_awswrangler/test_emr.py index 0c0112bf8..38c725694 100644 --- a/testing/test_awswrangler/test_emr.py +++ b/testing/test_awswrangler/test_emr.py @@ -13,7 +13,7 @@ @pytest.fixture(scope="module") def cloudformation_outputs(): - response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler-test") + response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler") outputs = {} for output in response.get("Stacks")[0].get("Outputs"): outputs[output.get("OutputKey")] = output.get("OutputValue") diff --git a/testing/run-tests.sh b/testing/tests.sh similarity index 93% rename from testing/run-tests.sh rename to testing/tests.sh index 3fb87ea78..01f013032 100755 --- a/testing/run-tests.sh +++ b/testing/tests.sh @@ -7,7 +7,7 @@ microtime() { START=$(microtime) -./run-validations.sh +./validations.sh pushd .. tox --recreate --develop -e py36 coverage html --directory testing/coverage diff --git a/testing/run-validations.sh b/testing/validations.sh similarity index 100% rename from testing/run-validations.sh rename to testing/validations.sh