Skip to content

Commit

Permalink
resource/alicloud_cs_managed_kubernetes: output error message when fa…
Browse files Browse the repository at this point in the history
…iled to upgrade cluster;resource/alicloud_cs_kubernetes: output error message when failed to upgrade cluster;resource/alicloud_cs_edge_kubernetes: output error message when failed to upgrade cluster;resource/alicloud_cs_serverless_kubernetes: output error message when failed to upgrade cluster
  • Loading branch information
sanyangji committed May 16, 2024
1 parent 7431a30 commit ebb9e8a
Show file tree
Hide file tree
Showing 9 changed files with 154 additions and 171 deletions.
2 changes: 1 addition & 1 deletion alicloud/resource_alicloud_cs_edge_kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ func resourceAlicloudCSEdgeKubernetesUpdate(d *schema.ResourceData, meta interfa
// upgrade cluster version
err := UpgradeAlicloudKubernetesCluster(d, meta)
if err != nil {
return WrapErrorf(err, DefaultErrorMsg, d.Id(), "UpgradeClusterVersion", DenverdinoAliyungo)
return WrapError(err)
}
d.Partial(false)
return resourceAlicloudCSKubernetesRead(d, meta)
Expand Down
16 changes: 8 additions & 8 deletions alicloud/resource_alicloud_cs_edge_kubernetes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ func TestAccAliCloudEdgeKubernetes(t *testing.T) {
"name": name,
"worker_vswitch_ids": []string{"${local.vswitch_id}"},
"worker_instance_types": []string{"${data.alicloud_instance_types.default.instance_types.0.id}"},
"version": "1.22.15-aliyunedge.1",
"worker_number": "1",
"version": "1.20.11-aliyunedge.1",
"worker_number": "2",
"password": "Test12345",
"pod_cidr": "10.99.0.0/16",
"service_cidr": "172.16.0.0/16",
Expand All @@ -138,8 +138,8 @@ func TestAccAliCloudEdgeKubernetes(t *testing.T) {
Check: resource.ComposeTestCheckFunc(
testAccCheck(map[string]string{
"name": name,
"version": "1.22.15-aliyunedge.1",
"worker_number": "1",
"version": "1.20.11-aliyunedge.1",
"worker_number": "2",
"password": "Test12345",
"pod_cidr": "10.99.0.0/16",
"service_cidr": "172.16.0.0/16",
Expand Down Expand Up @@ -186,13 +186,13 @@ func TestAccAliCloudEdgeKubernetes(t *testing.T) {
},
{
Config: testAccConfig(map[string]interface{}{
//"version": "1.24.6-aliyunedge.1",
"name": "modified-edge-cluster-again",
"version": "1.22.15-aliyunedge.1",
"name": "modified-edge-cluster-again",
}),
Check: resource.ComposeTestCheckFunc(
testAccCheck(map[string]string{
//"version": "1.24.6-aliyunedge.1",
"name": "modified-edge-cluster-again",
"version": "1.22.15-aliyunedge.1",
"name": "modified-edge-cluster-again",
}),
),
},
Expand Down
6 changes: 5 additions & 1 deletion alicloud/resource_alicloud_cs_kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -957,7 +957,11 @@ func resourceAlicloudCSKubernetesUpdate(d *schema.ResourceData, meta interface{}

d.SetPartial("tags")

UpgradeAlicloudKubernetesCluster(d, meta)
err := UpgradeAlicloudKubernetesCluster(d, meta)
if err != nil {
return WrapError(err)
}

d.Partial(false)
return resourceAlicloudCSKubernetesRead(d, meta)
}
Expand Down
24 changes: 12 additions & 12 deletions alicloud/resource_alicloud_cs_kubernetes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ func TestAccAliCloudCSKubernetes_prepaid(t *testing.T) {
{
Config: testAccConfig(map[string]interface{}{
"name_prefix": "tf-testAccKubernetes_prepaid",
"version": "1.24.6-aliyun.1",
"version": "1.26.15-aliyun.1",
"master_vswitch_ids": []string{"${local.vswitch_id}", "${local.vswitch_id}", "${local.vswitch_id}"},
"master_instance_types": []string{"${data.alicloud_instance_types.default.instance_types.0.id}", "${data.alicloud_instance_types.default.instance_types.0.id}", "${data.alicloud_instance_types.default.instance_types.0.id}"},
"master_disk_category": "cloud_ssd",
Expand Down Expand Up @@ -449,16 +449,16 @@ func TestAccAliCloudCSKubernetes_prepaid(t *testing.T) {
}),
),
},
//{
// Config: testAccConfig(map[string]interface{}{
// "version": "1.26.15-aliyun.1",
// }),
// Check: resource.ComposeTestCheckFunc(
// testAccCheck(map[string]string{
// "version": "1.26.15-aliyun.1",
// }),
// ),
//},
{
Config: testAccConfig(map[string]interface{}{
"version": "1.28.9-aliyun.1",
}),
Check: resource.ComposeTestCheckFunc(
testAccCheck(map[string]string{
"version": "1.28.9-aliyun.1",
}),
),
},
{
ResourceName: resourceId,
ImportState: true,
Expand Down Expand Up @@ -608,7 +608,7 @@ resource "alicloud_cs_kubernetes_node_pool" "default" {
system_disk_size = 50
system_disk_category = "cloud_essd"
system_disk_performance_level = "PL0"
desired_size = 1
desired_size = 2
}
`, name)
}
Expand Down
71 changes: 60 additions & 11 deletions alicloud/resource_alicloud_cs_managed_kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package alicloud

import (
"fmt"
"log"
"regexp"
"strconv"
"strings"
Expand Down Expand Up @@ -789,22 +790,70 @@ func resourceAlicloudCSManagedKubernetesCreate(d *schema.ResourceData, meta inte
}

func UpgradeAlicloudKubernetesCluster(d *schema.ResourceData, meta interface{}) error {
client := meta.(*connectivity.AliyunClient)
if d.HasChange("version") {
nextVersion := d.Get("version").(string)
args := &cs.UpgradeClusterArgs{
Version: nextVersion,
}

csService := CsService{client}
err := csService.UpgradeCluster(d.Id(), args)
if !d.HasChange("version") {
return nil
}

clusterId := d.Id()
version := d.Get("version").(string)
action := "UpgradeCluster"
c := meta.(*connectivity.AliyunClient)
rosCsClient, err := c.NewRoaCsClient()
if err != nil {
return err
}
args := &roacs.UpgradeClusterRequest{
NextVersion: tea.String(version),
}
// upgrade cluster
var resp *roacs.UpgradeClusterResponse
err = resource.Retry(UpgradeClusterTimeout, func() *resource.RetryError {
resp, err = rosCsClient.UpgradeCluster(tea.String(clusterId), args)
if NeedRetry(err) || resp == nil {
return resource.RetryableError(err)
}
if err != nil {
return WrapError(err)
return resource.NonRetryableError(err)
}
return nil
})

if err != nil {
return WrapErrorf(err, ResponseCodeMsg, d.Id(), action, err)
}

taskId := tea.StringValue(resp.Body.TaskId)
if taskId == "" {
return WrapErrorf(err, ResponseCodeMsg, d.Id(), action, resp)
}

d.SetPartial("version")
csClient := CsClient{client: rosCsClient}
stateConf := BuildStateConf([]string{}, []string{"success"}, d.Timeout(schema.TimeoutUpdate), 5*time.Second, csClient.DescribeTaskRefreshFunc(d, taskId, []string{"fail", "failed"}))
if jobDetail, err := stateConf.WaitForState(); err != nil {
// try to cancel task
wait := incrementalWait(3*time.Second, 3*time.Second)
_ = resource.Retry(5*time.Minute, func() *resource.RetryError {
_, _err := rosCsClient.CancelTask(tea.String(taskId))
if _err != nil {
if NeedRetry(_err) {
wait()
return resource.RetryableError(_err)
}
log.Printf("[WARN] %s ACK Cluster cancel upgrade error: %#v", clusterId, err)
}
return nil
})
// output error message
return WrapErrorf(err, ResponseCodeMsg, d.Id(), action, jobDetail)
}
// ensure cluster state is running
csService := CsService{client: c}
stateConf = BuildStateConf([]string{}, []string{"running"}, UpgradeClusterTimeout, 10*time.Second, csService.CsKubernetesInstanceStateRefreshFunc(clusterId, []string{"deleting", "failed"}))
if _, err := stateConf.WaitForState(); err != nil {
return WrapError(err)
}

d.SetPartial("version")
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion alicloud/resource_alicloud_cs_serverless_kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ func resourceAlicloudCSServerlessKubernetesUpdate(d *schema.ResourceData, meta i
// upgrade cluster version
err := UpgradeAlicloudKubernetesCluster(d, meta)
if err != nil {
return WrapErrorf(err, DefaultErrorMsg, d.Id(), "UpgradeClusterVersion", DenverdinoAliyungo)
return WrapError(err)
}

if err := modifyKubernetesCluster(d, meta); err != nil {
Expand Down
154 changes: 28 additions & 126 deletions alicloud/service_alicloud_cs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1130,84 +1130,42 @@ func (s *CsService) GetUserData(clusterId string, labels string, taints string)
return base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf(ATTACH_SCRIPT_WITH_VERSION+extra_options_in_line, region, region, version, token))), nil
}

func (s *CsService) UpgradeCluster(clusterId string, args *cs.UpgradeClusterArgs) error {
invoker := NewInvoker()
err := invoker.Run(func() error {
_, e := s.client.WithCsClient(func(csClient *cs.Client) (interface{}, error) {
return nil, csClient.UpgradeCluster(clusterId, args)
})
if e != nil {
return e
}
return nil
})

if err != nil {
return WrapError(err)
}

state, upgradeError := s.WaitForUpgradeCluster(clusterId, "Upgrade")
if state == cs.Task_Status_Success && upgradeError == nil {
// ensure upgrading finished, target running
stateConf := BuildStateConf([]string{"upgrading"}, []string{"running"}, UpgradeClusterTimeout, 10*time.Second, s.CsKubernetesInstanceStateRefreshFunc(clusterId, []string{"deleting", "failed"}))
if _, err := stateConf.WaitForState(); err != nil {
return WrapError(err)
}
return nil
}

// if upgrade failed cancel the task
err = invoker.Run(func() error {
_, e := s.client.WithCsClient(func(csClient *cs.Client) (interface{}, error) {
return nil, csClient.CancelUpgradeCluster(clusterId)
func (s *CsClient) DescribeTaskRefreshFunc(d *schema.ResourceData, taskId string, failStates []string) resource.StateRefreshFunc {
return func() (interface{}, string, error) {
var err error
var taskInfo *client.DescribeTaskInfoResponse
wait := incrementalWait(3*time.Second, 3*time.Second)
err = resource.Retry(5*time.Minute, func() *resource.RetryError {
taskInfo, err = s.client.DescribeTaskInfo(tea.String(taskId))
if err != nil {
if NeedRetry(err) {
wait()
return resource.RetryableError(err)
}
return resource.NonRetryableError(err)
}
return nil
})
if e != nil {
return e
}
return nil
})
if err != nil {
return WrapError(upgradeError)
}

if state, err := s.WaitForUpgradeCluster(clusterId, "CancelUpgrade"); err != nil || state != cs.Task_Status_Success {
log.Printf("[WARN] %s ACK Cluster cancel upgrade error: %#v", clusterId, err)
}

return WrapError(upgradeError)
}

func (s *CsService) WaitForUpgradeCluster(clusterId string, action string) (string, error) {
err := resource.Retry(UpgradeClusterTimeout, func() *resource.RetryError {
resp, err := s.client.WithCsClient(func(csClient *cs.Client) (interface{}, error) {
return csClient.QueryUpgradeClusterResult(clusterId)
})
if err != nil || resp == nil {
return resource.RetryableError(err)
if err != nil {
if NotFoundError(err) {
return taskInfo, "", nil
}
return nil, "", WrapError(err)
}

upgradeResult := resp.(*cs.UpgradeClusterResult)
if upgradeResult.UpgradeStep == cs.UpgradeStep_Success {
return nil
}
currentState := tea.StringValue(taskInfo.Body.State)
for _, failState := range failStates {
if currentState == failState {
if taskInfo.Body.Error != nil {
return taskInfo.Body.Error, currentState, WrapError(Error(FailedToReachTargetStatus, currentState))
}

if upgradeResult.UpgradeStep == cs.UpgradeStep_Pause && upgradeResult.UpgradeStatus.Failed == "true" {
msg := ""
events := upgradeResult.UpgradeStatus.Events
if len(events) > 0 {
msg = events[len(events)-1].Message
return taskInfo, currentState, WrapError(Error(FailedToReachTargetStatus, currentState))
}
return resource.NonRetryableError(fmt.Errorf("faild to %s cluster, error: %s", action, msg))
}
return resource.RetryableError(fmt.Errorf("%s cluster state not matched", action))
})

if err == nil {
log.Printf("[INFO] %s ACK Cluster %s successed", action, clusterId)
return cs.Task_Status_Success, nil
return taskInfo, currentState, nil
}

return cs.Task_Status_Failed, WrapError(err)
}

func GetKubernetesNetworkName(cluster *cs.KubernetesClusterDetail) (network string, err error) {
Expand All @@ -1233,59 +1191,3 @@ func (s *CsClient) DescribeUserPermission(uid string) ([]*client.DescribeUserPer

return body.Body, err
}

func (s *CsClient) DescribeCsAutoscalingConfig(id string) (*client.CreateAutoscalingConfigRequest, error) {

request := &client.CreateAutoscalingConfigRequest{
CoolDownDuration: tea.String("10m"),
UnneededDuration: tea.String("10m"),
UtilizationThreshold: tea.String("0.5"),
GpuUtilizationThreshold: tea.String("0.5"),
ScanInterval: tea.String("30s"),
}

return request, nil
}

func (s *CsClient) DescribeTaskInfo(taskId string) string {
if taskId == "" {
return ""
}
var err error
var resp *client.DescribeTaskInfoResponse
wait := incrementalWait(3*time.Second, 3*time.Second)
err = resource.Retry(5*time.Minute, func() *resource.RetryError {
resp, err = s.client.DescribeTaskInfo(tea.String(taskId))
if err != nil {
if NeedRetry(err) {
wait()
return resource.RetryableError(err)
}
return resource.NonRetryableError(err)
}
return nil
})

if err != nil {
return ""
}

return fmt.Sprintf("[TASK FAILED!!!]\nDetails: %++v", resp.Body.GoString())
}

func (s *CsClient) ModifyNodePoolNodeConfig(clusterId, nodepoolId string, request *client.ModifyNodePoolNodeConfigRequest) (interface{}, error) {
log.Printf("[DEBUG] modifyNodePoolKubeletRequest %++v", *request)

resp, err := s.client.ModifyNodePoolNodeConfig(tea.String(clusterId), tea.String(nodepoolId), request)
if err != nil {
return nil, WrapError(err)
}
if debugOn() {
requestMap := make(map[string]interface{})
requestMap["ClusterId"] = clusterId
requestMap["NodePoolId"] = nodepoolId
requestMap["Args"] = request
addDebug("ModifyNodePoolKubeletConfig", resp, requestMap)
}
return resp, err
}
Loading

0 comments on commit ebb9e8a

Please sign in to comment.