Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ __pycache__/
*/.DS_Store
/venv/
.idea
logs/*.log
logs/*.log
backup/jars
backup/tars
20 changes: 15 additions & 5 deletions bin/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,27 @@ function check_python_version {

function install_env {
check_python_version
if [[ ! -d $TOOL_HOME/venv ]]; then
if [[ ! -f $TOOL_HOME/venv/.installed ]]; then
# If not installed `venv` successfully, but `venv` already exists, remove it first.
if [[ -d $TOOL_HOME/venv ]]; then
logging warn "Deleting remnant useless venv ..."
rm -rf $TOOL_HOME/venv
logging warn "Deleted remnant useless venv successfully."
fi
logging info "Initializing venv ..."
python3 -m venv $TOOL_HOME/venv
source $TOOL_HOME/venv/bin/activate
logging info "Install dependencies ..."
logging info "Installing dependencies ..."
pip3 install -r $TOOL_HOME/requirements.txt
logging info "Installed dependencies successfully."
touch $TOOL_HOME/venv/.installed
logging info "Init $TOOL_HOME/.venv successfully."
else
logging warn "$TOOL_HOME/.venv already existing, skip install again."
fi
logging info "Please use 'source venv/bin/activate' to activate venv and execute commands."
logging info "Please use 'python deploy.py --help' to check the usage."
logging info "Enjoy it and have fun."
logging warn "Please use 'source venv/bin/activate' to activate venv and execute commands."
logging warn "Please use 'python deploy.py --help' to check the usage."
logging info "Please enjoy it and have fun."
}

function main {
Expand Down
4 changes: 2 additions & 2 deletions cloudformation_templates/ec2-cluster-kylin4-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ Parameters:
Description: EC2 instance type
Type: String
ConstraintDescription: must be a valid EC2 instance type.
Default: m5.2xlarge
Default: m5.xlarge
AllowedValues:
- m5.2xlarge
- m5.xlarge
Ec2VolumnTypeForKylin4Node:
Type: String
Default: gp2
Expand Down
4 changes: 2 additions & 2 deletions cloudformation_templates/ec2-cluster-kylin4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ Parameters:
Description: EC2 instance type
Type: String
ConstraintDescription: must be a valid EC2 instance type.
Default: m5.2xlarge
Default: m5.xlarge
AllowedValues:
- m5.2xlarge
- m5.xlarge
Ec2VolumnTypeForKylin4Node:
Type: String
Default: gp2
Expand Down
4 changes: 2 additions & 2 deletions cloudformation_templates/ec2-cluster-spark-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ Parameters:
Description: EC2 instance type
Type: String
ConstraintDescription: must be a valid EC2 instance type.
Default: m5.2xlarge
Default: m5.xlarge
AllowedValues:
- m5.2xlarge
- m5.xlarge
Ec2VolumnTypeForSparkMasterNode:
Type: String
Default: gp2
Expand Down
13 changes: 12 additions & 1 deletion cloudformation_templates/ec2-cluster-spark-slave-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,13 @@ Parameters:
- m5.xlarge
- m5.2xlarge
- m5.4xlarge
InstanceTypeForTest:
Description: EC2 instance type
Type: String
ConstraintDescription: Must be a valid EC2 instance type.
Default: m5.xlarge
AllowedValues:
- m5.xlarge
Ec2VolumnTypeForSlaveNode:
Type: String
Default: gp2
Expand Down Expand Up @@ -175,7 +182,11 @@ Resources:
- - Slave Scale
- !Ref WorkerNum
- Node for kylin4
InstanceType: !Ref InstanceType
InstanceType:
!If
- IsProductMode
- !Ref InstanceType
- !Ref InstanceTypeForTest
IamInstanceProfile: !Ref InstanceProfileId
NetworkInterfaces:
- DeviceIndex: 0
Expand Down
4 changes: 2 additions & 2 deletions cloudformation_templates/ec2-cluster-spark-slave.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ Parameters:
Description: EC2 instance type
Type: String
ConstraintDescription: must be a valid EC2 instance type.
Default: m5.2xlarge
Default: m5.xlarge
AllowedValues:
- m5.2xlarge
- m5.xlarge
Ec2VolumnTypeForSlaveNode:
Type: String
Default: gp2
Expand Down
4 changes: 2 additions & 2 deletions cloudformation_templates/ec2-cluster-static-services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ Parameters:
Description: EC2 instance type
Type: String
ConstraintDescription: must be a valid EC2 instance type.
Default: m5.xlarge
Default: m5.large
AllowedValues:
- m5.xlarge
- m5.large
Ec2VolumeSizeForStaticServicesNode:
Type: Number
Default: 20
Expand Down
4 changes: 2 additions & 2 deletions cloudformation_templates/ec2-cluster-zk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ Parameters:
Description: EC2 instance type
Type: String
ConstraintDescription: must be a valid EC2 instance type.
Default: m5.xlarge
Default: m5.large
AllowedValues:
- m5.xlarge
- m5.large
Ec2VolumeSizeForZookeeperNode:
Type: Number
Default: 10
Expand Down
14 changes: 13 additions & 1 deletion clouds/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@ def is_instances_terminated(self) -> bool:
def kylin_stack_name(self) -> str:
return self.cloud_instance.kylin_stack_name

@property
def static_service_stack_name(self) -> str:
return self.cloud_instance.static_service_stack_name

@property
def rds_stack_name(self) -> str:
return self.cloud_instance.rds_stack_name

@property
def vpc_stack_name(self) -> str:
return self.cloud_instance.vpc_stack_name

@property
def is_associated_public_ip(self) -> bool:
return self.config[Params.ASSOSICATED_PUBLIC_IP.value] == 'true'
Expand Down Expand Up @@ -152,7 +164,7 @@ def terminate_ec2_cluster(self) -> Optional[Dict]:
self.cloud_instance.terminate_zk_stack()
logger.info('Cluster terminated useless nodes.')

def destroy_rds_and_vpc(self) -> None:
def destroy_monitor_and_rds_and_vpc(self) -> None:
if not self.is_destroy_all:
return
logger.info('Prepare to destroy RDS and VPC and monitor node.')
Expand Down
2 changes: 1 addition & 1 deletion engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def destroy_default_cluster(self):
logger.info('Destroy default Kylin Cluster successfully.')

def destroy_rds_and_vpc(self) -> None:
self.engine_utils.destroy_rds_and_vpc()
self.engine_utils.destroy_monitor_and_rds_and_vpc()

def list_alive_nodes(self) -> None:
logger.info('Ec2: list alive nodes.')
Expand Down
10 changes: 2 additions & 8 deletions engine_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ def destroy_cluster(self, cluster_num: int) -> None:
self.aws.destroy_clusters(cluster_nums=[cluster_num])
self.aws.restart_prometheus_server()

def destroy_rds_and_vpc(self) -> None:
self.aws.destroy_rds_and_vpc()
def destroy_monitor_and_rds_and_vpc(self) -> None:
self.aws.destroy_monitor_and_rds_and_vpc()

def is_cluster_ready(self) -> bool:
if self.cloud_address:
Expand Down Expand Up @@ -202,12 +202,6 @@ def download_jars(self) -> None:
jars = self.needed_jars()
for jar in jars:
Utils.download_jar(jar)
if self.config[Config.ENABLE_SOFT_AFFINITY.value] == 'true':
assert Utils.files_in_jars() == 4, f"Needed jars must be 4, not {Utils.files_in_jars()}, " \
f"which contains {jars}."
else:
assert Utils.files_in_jars() == 2, f"Needed jars must be 2, not {Utils.files_in_jars()}, " \
f"which contains {jars}."

def upload_needed_files(self) -> None:
logger.info("Start to uploading tars.")
Expand Down
39 changes: 24 additions & 15 deletions instances/aws_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,7 @@ def is_rds_exists(self) -> bool:
try:
self.rds_client.describe_db_instances(DBInstanceIdentifier=self.db_identifier)
except self.rds_client.exceptions.DBInstanceNotFoundFault as ex:
logger.warning(ex.response['Error']['Message'])
logger.info(f'Now creating {self.db_identifier}.')
logger.warning(f'DB {self.db_identifier} is not found.')
return False
return True

Expand All @@ -409,14 +408,12 @@ def create_rds_stack(self) -> Optional[Dict]:
# update needed params
params[Params.SUBNET_GROUP_NAME.value] = self.get_subnet_group()
params[Params.SECURITY_GROUP.value] = self.get_security_group_id()
Params[Params.ZONE.value] = self.region + 'b'
params[Params.ZONE.value] = self.region + 'b'
resp = self.create_stack(
stack_name=self.rds_stack_name,
file_path=self.path_of_rds_stack,
params=params,
)
# Make sure that rds create successfully.
assert self.is_stack_complete(self.rds_stack_name), f'Rds {self.db_identifier} create failed, please check.'
return resp

def terminate_rds_stack(self) -> Optional[Dict]:
Expand Down Expand Up @@ -528,7 +525,6 @@ def create_zk_stack_by_cluster(self, cluster_num: int = None) -> Optional[Dict]:
file_path=self.path_of_zk_stack,
params=params
)
assert self.is_stack_complete(zk_stack_name), f'{zk_stack_name} create failed, please check.'
return resp

def terminate_zk_stack(self) -> Optional[Dict]:
Expand Down Expand Up @@ -677,7 +673,6 @@ def create_kylin_stack_by_cluster(self, cluster_num: int = None) -> Optional[Dic
file_path=self.path_of_kylin_stack,
params=params
)
assert self.is_stack_complete(kylin_stack_name), f"Create scaled kylin stack not complete, please check."
return resp

def terminate_kylin_stack(self) -> Optional[Dict]:
Expand Down Expand Up @@ -811,7 +806,6 @@ def scale_up_kylin(self, kylin_num: int, cluster_num: int = None) -> Optional[Di
file_path=self.path_of_kylin_scale_stack,
params=params
)
assert self.is_stack_complete(stack_name)
return resp

def scale_down_kylin(self, kylin_num: int, cluster_num: int = None) -> Optional[Dict]:
Expand Down Expand Up @@ -889,9 +883,6 @@ def create_spark_master_stack_by_cluster(self, cluster_num: int = None) -> Optio
file_path=self.path_of_spark_master_stack,
params=params
)

assert self.is_stack_complete(spark_master_stack_name), \
f'Current {spark_master_stack_name} stack not create complete, please check.'
return resp

def terminate_spark_master_stack(self) -> Optional[Dict]:
Expand Down Expand Up @@ -1135,7 +1126,6 @@ def scale_up_worker(self, worker_num: int, cluster_num: int = None) -> Optional[
file_path=self.path_of_spark_slave_scaled_stack,
params=params
)
assert self.is_stack_complete(stack_name)
return resp

def scale_down_worker(self, worker_num: int, cluster_num: int = None) -> Optional[Dict]:
Expand Down Expand Up @@ -1453,6 +1443,7 @@ def terminate_stack_by_name(self, stack_name: str) -> Optional[Dict]:
return resp

def create_stack(self, stack_name: str, file_path: str, params: Dict, is_capability: bool = False) -> Dict:
logger.info(f'Now creating stack: {stack_name}.')
try:
if is_capability:
resp = self.cf_client.create_stack(
Expand All @@ -1470,8 +1461,8 @@ def create_stack(self, stack_name: str, file_path: str, params: Dict, is_capabil
return resp
except ParamValidationError as ex:
logger.error(ex)
assert self.is_stack_complete(stack_name=stack_name), \
f"Stack {stack_name} not create complete, please check."
assert self.is_stack_complete(stack_name=stack_name), f"Stack: {stack_name} not create complete, please check."
logger.info(f'Create stack: {stack_name} complete.')

def delete_stack(self, stack_name: str) -> Dict:
logger.info(f'Current terminating stack: {stack_name}.')
Expand Down Expand Up @@ -2006,10 +1997,14 @@ def is_valid_cidr_ip(self) -> bool:

def is_object_exists_on_s3(self, filename: str, bucket: str, bucket_dir: str) -> bool:
try:
self.s3_client.head_object(Bucket=bucket, Key=bucket_dir + filename)
response = self.s3_client.head_object(Bucket=bucket, Key=bucket_dir + filename)
Utils.is_uploaded_success(filename=filename, size_in_bytes=response['ContentLength'])
except botocore.exceptions.ClientError as ex:
assert ex.response['Error']['Code'] == '404'
return False
except AssertionError as ex:
logger.error(ex)
return False
return True

def is_s3_directory_exists(self, bucket: str, bucket_dir: str) -> bool:
Expand Down Expand Up @@ -2085,9 +2080,23 @@ def is_stack_create_complete(self, stack_name: str) -> bool:
def is_stack_delete_complete(self, stack_name: str) -> bool:
return self._stack_status_check(name_or_id=stack_name, status='DELETE_COMPLETE')

def is_stack_rollback_complete(self, stack_name: str) -> bool:
return self._stack_status_check(name_or_id=stack_name, status='ROLLBACK_COMPLETE')

def is_stack_rollback_failed(self, stack_name: str) -> bool:
return self._stack_status_check(name_or_id=stack_name, status='ROLLBACK_FAILED')

def is_stack_rollback_in_progress(self, stack_name: str) -> bool:
return self._stack_status_check(name_or_id=stack_name, status='ROLLBACK_IN_PROGRESS')

def is_stack_complete(self, stack_name: str) -> bool:
if self._stack_complete(stack_name):
return True
# before return false, check stack whether is created failed or other failed status.
if self.is_stack_rollback_failed(stack_name=stack_name) \
or self.is_stack_rollback_complete(stack_name=stack_name) \
or self.is_stack_rollback_in_progress(stack_name=stack_name):
raise Exception(f'Current stack: {stack_name} is create failed, please check.')
return False

def _validate_spark_worker_scale(self, stack_name: str) -> None:
Expand Down
7 changes: 3 additions & 4 deletions kylin_configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ EC2_RDS_PARAMS:
Ec2Mode: test
RDSEngine: mysql
RDSEngineVersion: 5.7.25
Zone: cn-northwest-1b

EC2_STATIC_SERVICES_PARAMS:
# Note: params details check in related yaml file
Expand All @@ -172,10 +171,10 @@ EC2_STATIC_SERVICES_PARAMS:
DbPassword: *DbPassword

StaticServicesScriptFileName: prepare-ec2-env-for-static-services.sh
Ec2Mode: product
Ec2Mode: test
# followed params is invalid if Ec2Mode(which set in the yaml) is 'test'
Ec2InstanceTypeForStaticServices: m5.4xlarge
Ec2VolumeSizeForStaticServicesNode: '50'
Ec2InstanceTypeForStaticServices: m5.2xlarge
Ec2VolumeSizeForStaticServicesNode: '20'
Ec2VolumnTypeForStaticServicesNode: standard

EC2_ZOOKEEPERS_PARAMS:
Expand Down
Loading