forked from vmware/cluster-api-provider-cloud-director
/
vcdmachine_controller.go
1442 lines (1305 loc) · 63.9 KB
/
vcdmachine_controller.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
Copyright 2021 VMware, Inc.
SPDX-License-Identifier: Apache-2.0
*/
package controllers
import (
"bytes"
"context"
_ "embed" // this needs go 1.16+
b64 "encoding/base64"
"fmt"
"math"
"reflect"
"strconv"
"strings"
"text/template"
"time"
"github.com/pkg/errors"
"github.com/vmware/cloud-provider-for-cloud-director/pkg/vcdsdk"
infrav1 "github.com/vmware/cluster-api-provider-cloud-director/api/v1beta1"
"github.com/vmware/cluster-api-provider-cloud-director/pkg/capisdk"
"github.com/vmware/cluster-api-provider-cloud-director/release"
"github.com/vmware/go-vcloud-director/v2/govcd"
"github.com/vmware/go-vcloud-director/v2/types/v56"
"gopkg.in/yaml.v2"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/klog"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/cluster-api/util/patch"
"sigs.k8s.io/cluster-api/util/predicates"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/source"
)
type CloudInitScriptInput struct {
ControlPlane bool // control plane node
NvidiaGPU bool // configure containerd for NVIDIA libraries
BootstrapRunCmd string // bootstrap run command
HTTPProxy string // httpProxy endpoint
HTTPSProxy string // httpsProxy endpoint
NoProxy string // no proxy values
MachineName string // vm host name
ResizedControlPlane bool // resized node type: worker | control_plane
VcdHostFormatted string // vcd host
TKGVersion string // tkgVersion
ClusterID string //cluster id
}
const (
ReclaimPolicyDelete = "Delete"
ReclaimPolicyRetain = "Retain"
VcdResourceTypeVM = "virtual-machine"
)
const Mebibyte = 1048576
// The following `embed` directives read the file in the mentioned path and copy the content into the declared variable.
// These variables need to be global within the package.
//go:embed cluster_scripts/cloud_init.tmpl
var cloudInitScriptTemplate string
// VCDMachineReconciler reconciles a VCDMachine object
type VCDMachineReconciler struct {
client.Client
}
//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=vcdmachines,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=vcdmachines/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=vcdmachines/finalizers,verbs=update
func (r *VCDMachineReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, rerr error) {
log := ctrl.LoggerFrom(ctx)
// Fetch the VCDMachine instance.
vcdMachine := &infrav1.VCDMachine{}
if err := r.Client.Get(ctx, req.NamespacedName, vcdMachine); err != nil {
if apierrors.IsNotFound(err) {
return ctrl.Result{}, nil
}
return ctrl.Result{}, err
}
machine, err := util.GetOwnerMachine(ctx, r.Client, vcdMachine.ObjectMeta)
if err != nil {
return ctrl.Result{}, err
}
if machine == nil {
log.Info("Waiting for Machine Controller to set OwnerRef on VCDMachine")
return ctrl.Result{}, nil
}
log = log.WithValues("machine", machine.Name)
// Fetch the Cluster from k8s etcd.
cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machine.ObjectMeta)
if err != nil {
log.Info("VCDMachine owner Machine is missing cluster label or cluster does not exist")
return ctrl.Result{}, err
}
if cluster == nil {
log.Info("Please associate this machine with a cluster using the label", "label", clusterv1.ClusterLabelName)
return ctrl.Result{}, nil
}
log = log.WithValues("cluster", cluster.Name)
// Return early if the object or Cluster is paused.
if annotations.IsPaused(cluster, vcdMachine) {
log.Info("Reconciliation is paused for this object")
return ctrl.Result{}, nil
}
machineBeingDeleted := !vcdMachine.ObjectMeta.DeletionTimestamp.IsZero()
// Fetch the VCD Cluster.
vcdCluster := &infrav1.VCDCluster{}
vcdClusterName := client.ObjectKey{
Namespace: vcdMachine.Namespace,
Name: cluster.Spec.InfrastructureRef.Name,
}
if err := r.Client.Get(ctx, vcdClusterName, vcdCluster); err != nil {
log.Info("VCDCluster is not available yet")
if !machineBeingDeleted {
return ctrl.Result{}, nil
} else {
log.Info("Continuing to delete the VCDMachine, since deletion timestamp is set")
}
}
// Initialize the patch helper
patchHelper, err := patch.NewHelper(vcdMachine, r)
if err != nil {
return ctrl.Result{}, err
}
// Always attempt to Patch the VCDMachine object and status after each reconciliation.
defer func() {
if err := patchVCDMachine(ctx, patchHelper, vcdMachine); err != nil {
log.Error(err, "Failed to patch VCDMachine")
if rerr == nil {
rerr = err
}
}
}()
// Add finalizer first if not exist to avoid the race condition between init and delete
if !controllerutil.ContainsFinalizer(vcdMachine, infrav1.MachineFinalizer) {
controllerutil.AddFinalizer(vcdMachine, infrav1.MachineFinalizer)
return ctrl.Result{}, nil
}
// If the machine is not being deleted, check if the infrastructure is ready. If not ready, return and wait for
// the cluster object to be updated
if !machineBeingDeleted && !cluster.Status.InfrastructureReady {
log.Info("Waiting for VCDCluster Controller to create cluster infrastructure")
conditions.MarkFalse(vcdMachine, ContainerProvisionedCondition,
WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "")
return ctrl.Result{}, nil
}
// Handle deleted machines
if machineBeingDeleted {
return r.reconcileDelete(ctx, cluster, machine, vcdMachine, vcdCluster)
}
// Handle non-deleted machines
return r.reconcileNormal(ctx, cluster, machine, vcdMachine, vcdCluster)
}
func patchVCDMachine(ctx context.Context, patchHelper *patch.Helper, vcdMachine *infrav1.VCDMachine) error {
conditions.SetSummary(vcdMachine,
conditions.WithConditions(
ContainerProvisionedCondition,
BootstrapExecSucceededCondition,
),
conditions.WithStepCounterIf(vcdMachine.ObjectMeta.DeletionTimestamp.IsZero()),
)
return patchHelper.Patch(
ctx,
vcdMachine,
patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
clusterv1.ReadyCondition,
ContainerProvisionedCondition,
BootstrapExecSucceededCondition,
}},
)
}
const (
NetworkConfiguration = "guestinfo.postcustomization.networkconfiguration.status"
ProxyConfiguration = "guestinfo.postcustomization.proxy.setting.status"
MeteringConfiguration = "guestinfo.metering.status"
KubeadmInit = "guestinfo.postcustomization.kubeinit.status"
KubeadmNodeJoin = "guestinfo.postcustomization.kubeadm.node.join.status"
NvidiaRuntimeInstall = "guestinfo.postcustomization.nvidia.runtime.install.status"
PostCustomizationScriptExecutionStatus = "guestinfo.post_customization_script_execution_status"
PostCustomizationScriptFailureReason = "guestinfo.post_customization_script_execution_failure_reason"
)
var controlPlanePostCustPhases = []string{
NetworkConfiguration,
MeteringConfiguration,
ProxyConfiguration,
KubeadmInit,
}
var joinPostCustPhases = []string{
NetworkConfiguration,
MeteringConfiguration,
KubeadmNodeJoin,
}
func removeFromSlice(remove string, arr []string) []string {
for ind, str := range arr {
if str == remove {
return append(arr[:ind], arr[ind+1:]...)
}
}
return arr
}
func strInSlice(findStr string, arr []string) bool {
for _, str := range arr {
if str == findStr {
return true
}
}
return false
}
const phaseSecondTimeout = 600
func (r *VCDMachineReconciler) waitForPostCustomizationPhase(ctx context.Context,
workloadVCDClient *vcdsdk.Client, vm *govcd.VM, phase string) error {
log := ctrl.LoggerFrom(ctx)
startTime := time.Now()
possibleStatuses := []string{"", "in_progress", "successful"}
currentStatus := possibleStatuses[0]
vdcManager, err := vcdsdk.NewVDCManager(workloadVCDClient, workloadVCDClient.ClusterOrgName,
workloadVCDClient.ClusterOVDCName)
if err != nil {
return errors.Wrapf(err, "failed to create a vdc manager object when waiting for post customization phase of VM")
}
for {
if err := vm.Refresh(); err != nil {
return errors.Wrapf(err, "unable to refresh vm [%s]: [%v]", vm.VM.Name, err)
}
newStatus, err := vdcManager.GetExtraConfigValue(vm, phase)
if err != nil {
return errors.Wrapf(err, "unable to get extra config value for key [%s] for vm: [%s]: [%v]",
phase, vm.VM.Name, err)
}
log.Info("Obtained machine status ", "phase", phase, "status", newStatus)
if !strInSlice(newStatus, possibleStatuses) {
return errors.Wrapf(err, "invalid postcustomiation phase: [%s] for key [%s] for vm [%s]",
newStatus, phase, vm.VM.Name)
}
if newStatus != currentStatus {
possibleStatuses = removeFromSlice(currentStatus, possibleStatuses)
currentStatus = newStatus
}
if newStatus == possibleStatuses[len(possibleStatuses)-1] { // successful status
return nil
}
// catch intermediate script execution failure
scriptExecutionStatus, err := vdcManager.GetExtraConfigValue(vm, PostCustomizationScriptExecutionStatus)
if err != nil {
return errors.Wrapf(err, "unable to get extra config value for key [%s] for vm: [%s]: [%v]",
PostCustomizationScriptExecutionStatus, vm.VM.Name, err)
}
if scriptExecutionStatus != "" {
execStatus, err := strconv.Atoi(scriptExecutionStatus)
if err != nil {
return errors.Wrapf(err, "unable to convert script execution status [%s] to int: [%v]",
scriptExecutionStatus, err)
}
if execStatus != 0 {
scriptExecutionFailureReason, err := vdcManager.GetExtraConfigValue(vm, PostCustomizationScriptFailureReason)
if err != nil {
return errors.Wrapf(err, "unable to get extra config value for key [%s] for vm, "+
"(script execution status [%d]): [%s]: [%v]",
PostCustomizationScriptFailureReason, execStatus, vm.VM.Name, err)
}
return fmt.Errorf("script failed with status [%d] and reason [%s]", execStatus, scriptExecutionFailureReason)
}
}
if seconds := int(time.Since(startTime) / time.Second); seconds > phaseSecondTimeout {
return fmt.Errorf("time for postcustomization status [%s] exceeded timeout [%d]",
phase, phaseSecondTimeout)
}
time.Sleep(10 * time.Second)
}
}
func (r *VCDMachineReconciler) reconcileNormal(ctx context.Context, cluster *clusterv1.Cluster,
machine *clusterv1.Machine, vcdMachine *infrav1.VCDMachine, vcdCluster *infrav1.VCDCluster) (res ctrl.Result, retErr error) {
log := ctrl.LoggerFrom(ctx, "machine", machine.Name, "cluster", vcdCluster.Name)
// To avoid spamming RDEs with updates, only update the RDE with events when machine creation is ongoing
skipRDEEventUpdates := machine.Status.BootstrapReady
userCreds, err := getUserCredentialsForCluster(ctx, r.Client, vcdCluster.Spec.UserCredentialsContext)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "Error getting client credentials to reconcile Cluster [%s] infrastructure", vcdCluster.Name)
}
workloadVCDClient, err := vcdsdk.NewVCDClientFromSecrets(vcdCluster.Spec.Site, vcdCluster.Spec.Org,
vcdCluster.Spec.Ovdc, vcdCluster.Spec.Org, userCreds.Username, userCreds.Password, userCreds.RefreshToken, true, true)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "Unable to create VCD client to reconcile infrastructure for the Machine [%s]", machine.Name)
}
capvcdRdeManager := capisdk.NewCapvcdRdeManager(workloadVCDClient, vcdCluster.Status.InfraId)
if vcdMachine.Spec.ProviderID != nil && vcdMachine.Status.ProviderID != nil {
vcdMachine.Status.Ready = true
conditions.MarkTrue(vcdMachine, ContainerProvisionedCondition)
err = capvcdRdeManager.AddToEventSet(ctx, capisdk.InfraVmBootstrapped, "", machine.Name, "", skipRDEEventUpdates)
if err != nil {
log.Error(err, "failed to add InfraVmBootstrapped event into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, nil
}
if machine.Spec.Bootstrap.DataSecretName == nil {
if !util.IsControlPlaneMachine(machine) && !conditions.IsTrue(cluster,
clusterv1.ControlPlaneInitializedCondition) {
log.Info("Waiting for the control plane to be initialized")
conditions.MarkFalse(vcdMachine, ContainerProvisionedCondition,
clusterv1.WaitingForControlPlaneAvailableReason, clusterv1.ConditionSeverityInfo, "")
return ctrl.Result{}, nil
}
log.Info("Waiting for the Bootstrap provider controller to set bootstrap data")
conditions.MarkFalse(vcdMachine, ContainerProvisionedCondition,
WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "")
return ctrl.Result{}, nil
}
patchHelper, err := patch.NewHelper(vcdMachine, r.Client)
if err != nil {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.CAPVCDObjectPatchError, "", machine.Name, fmt.Sprintf("%v", err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add CAPVCDObjectPatchError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error patching VCDMachine [%s] of cluster [%s]", vcdMachine.Name, vcdCluster.Name)
}
conditions.MarkTrue(vcdMachine, ContainerProvisionedCondition)
if !conditions.Has(vcdMachine, BootstrapExecSucceededCondition) {
conditions.MarkFalse(vcdMachine, BootstrapExecSucceededCondition,
BootstrappingReason, clusterv1.ConditionSeverityInfo, "")
if err := patchVCDMachine(ctx, patchHelper, vcdMachine); err != nil {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.CAPVCDObjectPatchError, "", machine.Name, fmt.Sprintf("%v", err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add CAPVCDObjectPatchError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error patching VCDMachine [%s] of cluster [%s]", vcdMachine.Name, vcdCluster.Name)
}
}
err = capvcdRdeManager.RdeManager.RemoveErrorByNameOrIdFromErrorSet(ctx, vcdsdk.ComponentCAPVCD, capisdk.CAPVCDObjectPatchError, "", machine.Name)
if err != nil {
log.Error(err, "failed to remove CAPVCDObjectPatchError from RDE", "rdeID", vcdCluster.Status.InfraId)
}
vdcManager, err := vcdsdk.NewVDCManager(workloadVCDClient, workloadVCDClient.ClusterOrgName,
workloadVCDClient.ClusterOVDCName)
if err != nil {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineError, "", machine.Name, fmt.Sprintf("%v", err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add VCDMachineError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "failed to create a vdc manager object when reconciling machine [%s]", vcdMachine.Name)
}
err = capvcdRdeManager.RdeManager.RemoveErrorByNameOrIdFromErrorSet(ctx, vcdsdk.ComponentCAPVCD, capisdk.VCDMachineError, "", machine.Name)
if err != nil {
log.Error(err, "failed to remove VCDMachineError from RDE", "rdeID", vcdCluster.Status.InfraId)
}
// The vApp should have already been created, so this is more of a Get of the vApp
vAppName := cluster.Name
vApp, err := vdcManager.Vdc.GetVAppByName(vAppName, true)
if err != nil {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDClusterVappCreationError, "", machine.Name, fmt.Sprintf("%v", err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add VCDClusterVappCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err,
"Error provisioning infrastructure for the machine [%s] of the cluster [%s]",
machine.Name, vcdCluster.Name)
}
err = capvcdRdeManager.RdeManager.RemoveErrorByNameOrIdFromErrorSet(ctx, vcdsdk.ComponentCAPVCD, capisdk.VCDClusterVappCreationError, "", "")
if err != nil {
log.Error(err, "failed to remove VCDClusterVappCreationError from RDE", "rdeID", vcdCluster.Status.InfraId)
}
bootstrapJinjaScript, err := r.getBootstrapData(ctx, machine)
if err != nil {
err1 := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineScriptGenerationError, "", machine.Name, fmt.Sprintf("%v", err))
if err1 != nil {
log.Error(err1, "failed to add VCDMachineScriptGenerationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error retrieving bootstrap data for machine [%s] of the cluster [%s]",
machine.Name, vcdCluster.Name)
}
// In a multimaster cluster, the initial control plane node runs `kubeadm init`; additional control plane nodes
// run `kubeadm join`. The joining control planes run `kubeadm join`, so these nodes use the join script.
// Although it is sufficient to just check if `kubeadm join` is in the bootstrap script, using the
// isControlPlaneMachine function is a simpler operation, so this function is called first.
useControlPlaneScript := util.IsControlPlaneMachine(machine) &&
!strings.Contains(bootstrapJinjaScript, "kubeadm join")
// Scaling up Control Plane initially creates the nodes as worker, which eventually joins the original control plane
// Hence we are checking if it contains the control plane label and has kubeadm join in the script
isResizedControlPlane := util.IsControlPlaneMachine(machine) && strings.Contains(bootstrapJinjaScript, "kubeadm join")
// Construct a CloudInitScriptInput struct to pass into template.Execute() function to generate the necessary
// cloud init script for the relevant node type, i.e. control plane or worker node
cloudInitInput := CloudInitScriptInput{}
if !vcdMachine.Spec.Bootstrapped {
if useControlPlaneScript {
cloudInitInput = CloudInitScriptInput{
ControlPlane: true,
}
}
cloudInitInput.HTTPProxy = vcdCluster.Spec.ProxyConfigSpec.HTTPProxy
cloudInitInput.HTTPSProxy = vcdCluster.Spec.ProxyConfigSpec.HTTPSProxy
cloudInitInput.NoProxy = vcdCluster.Spec.ProxyConfigSpec.NoProxy
cloudInitInput.MachineName = machine.Name
// TODO: After tenants has access to siteId, populate siteId to cloudInitInput as opposed to the site
cloudInitInput.VcdHostFormatted = strings.ReplaceAll(vcdCluster.Spec.Site, "/", "\\/")
cloudInitInput.NvidiaGPU = vcdMachine.Spec.EnableNvidiaGPU
cloudInitInput.TKGVersion = getTKGVersion(cluster) // needed for both worker & control plane machines for metering
cloudInitInput.ClusterID = vcdCluster.Status.InfraId // needed for both worker & control plane machines for metering
cloudInitInput.ResizedControlPlane = isResizedControlPlane
}
mergedCloudInitBytes, err := MergeJinjaToCloudInitScript(cloudInitInput, bootstrapJinjaScript)
if err != nil {
err1 := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineScriptGenerationError, "", machine.Name, fmt.Sprintf("%v", err))
if err1 != nil {
log.Error(err1, "failed to add VCDMachineScriptGenerationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err,
"Error merging bootstrap jinja script with the cloudInit script for [%s/%s] [%s]",
vAppName, machine.Name, bootstrapJinjaScript)
}
cloudInit := string(mergedCloudInitBytes)
// nothing is redacted in the cloud init script - please ensure no secrets are present
log.Info(fmt.Sprintf("Cloud init Script: [%s]", cloudInit))
err = capvcdRdeManager.AddToEventSet(ctx, capisdk.CloudInitScriptGenerated, "", machine.Name, "", skipRDEEventUpdates)
if err != nil {
log.Error(err, "failed to add CloudInitScriptGenerated event into RDE", "rdeID", vcdCluster.Status.InfraId)
}
err = capvcdRdeManager.RdeManager.RemoveErrorByNameOrIdFromErrorSet(ctx, vcdsdk.ComponentCAPVCD, capisdk.VCDMachineScriptGenerationError, "", machine.Name)
if err != nil {
log.Error(err, "failed to remove VCDMachineScriptGenerationError from RDE", "rdeID", vcdCluster.Status.InfraId)
}
vmExists := true
vm, err := vApp.GetVMByName(machine.Name, true)
if err != nil && err != govcd.ErrorEntityNotFound {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("%v", err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error provisioning infrastructure for the machine; unable to query for VM [%s] in vApp [%s]",
machine.Name, vAppName)
} else if err == govcd.ErrorEntityNotFound {
vmExists = false
}
if !vmExists {
log.Info("Adding infra VM for the machine")
// vcda-4391 fixed
err = vdcManager.AddNewVM(machine.Name, vcdCluster.Name, 1,
vcdMachine.Spec.Catalog, vcdMachine.Spec.Template, vcdMachine.Spec.PlacementPolicy,
vcdMachine.Spec.SizingPolicy, vcdMachine.Spec.StorageProfile, "", false)
if err != nil {
err1 := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("%v", err))
if err1 != nil {
log.Error(err1, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error provisioning infrastructure for the machine; unable to create VM [%s] in vApp [%s]",
machine.Name, vApp.VApp.Name)
}
vm, err = vApp.GetVMByName(machine.Name, true)
if err != nil {
err1 := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("%v", err))
if err1 != nil {
log.Error(err1, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error provisioning infrastructure for the machine; unable to find newly created VM [%s] in vApp [%s]",
vm.VM.Name, vAppName)
}
if vm == nil || vm.VM == nil {
err1 := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("%v", err))
if err1 != nil {
log.Error(err1, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Obtained nil VM after creating VM [%s]", machine.Name)
}
// NOTE: VMs are not added to VCDResourceSet intentionally as the VMs can be obtained from the VApp and
// VCDResourceSet can get bloated with VMs if the cluster contains a large number of worker nodes
}
desiredNetworks := append([]string{vcdCluster.Spec.OvdcNetwork}, vcdMachine.Spec.ExtraOvdcNetworks...)
if err = r.reconcileVMNetworks(vdcManager, vApp, vm, desiredNetworks); err != nil {
log.Error(err, fmt.Sprintf("Error while attaching networks to vApp and VMs"))
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
}
// checks before setting address in machine status
if vm.VM == nil {
log.Error(nil, fmt.Sprintf("Requeuing...; vm.VM should not be nil: [%#v]", vm))
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
}
if vm.VM.NetworkConnectionSection == nil || len(vm.VM.NetworkConnectionSection.NetworkConnection) == 0 {
log.Error(nil, fmt.Sprintf("Requeuing...; network connection section was not found for vm [%s(%s)]: [%#v]", vm.VM.Name, vm.VM.ID, vm.VM))
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
}
primaryNetwork := getPrimaryNetwork(vm.VM)
if primaryNetwork == nil {
log.Error(nil, fmt.Sprintf("Requeuing...; failed to get existing network connection information for vm [%s(%s)]: [%#v]. NetworkConnection[0] should not be nil",
vm.VM.Name, vm.VM.ID, vm.VM.NetworkConnectionSection))
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
}
if primaryNetwork.IPAddress == "" {
log.Error(nil, fmt.Sprintf("Requeuing...; NetworkConnection[0] IP Address should not be empty for vm [%s(%s)]: [%#v]",
vm.VM.Name, vm.VM.ID, *vm.VM.NetworkConnectionSection.NetworkConnection[0]))
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
}
// set address in machine status
machineAddress := primaryNetwork.IPAddress
vcdMachine.Status.Addresses = []clusterv1.MachineAddress{
{
Type: clusterv1.MachineHostName,
Address: vm.VM.Name,
},
{
Type: clusterv1.MachineInternalIP,
Address: machineAddress,
},
{
Type: clusterv1.MachineExternalIP,
Address: machineAddress,
},
}
//Arvind Bhoj - Commeted LB section as we are not using this.
//gateway, err := vcdsdk.NewGatewayManager(ctx, workloadVCDClient, vcdCluster.Spec.OvdcNetwork, vcdCluster.Spec.LoadBalancerConfigSpec.VipSubnet)
//if err != nil {
// updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("%v", err))
// if updatedErr != nil {
// log.Error(updatedErr, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
// }
// return ctrl.Result{}, errors.Wrapf(err, "failed to create gateway manager object while reconciling machine [%s]", vcdMachine.Name)
//}
// Update loadbalancer pool with the IP of the control plane node as a new member.
// Note that this must be done before booting on the VM!
/*if util.IsControlPlaneMachine(machine) {
virtualServiceName := capisdk.GetVirtualServiceNameUsingPrefix(
capisdk.GetVirtualServiceNamePrefix(vcdCluster.Name, vcdCluster.Status.InfraId), "tcp")
lbPoolName := capisdk.GetLoadBalancerPoolNameUsingPrefix(
capisdk.GetLoadBalancerPoolNamePrefix(vcdCluster.Name, vcdCluster.Status.InfraId), "tcp")
lbPoolRef, err := gateway.GetLoadBalancerPool(ctx, lbPoolName)
if err != nil {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.LoadBalancerError, "", machine.Name, fmt.Sprintf("Error retrieving/updating load balancer pool [%s]: %v", lbPoolName, err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add LoadBalancerError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error retrieving/updating load balancer pool [%s] for the "+
"control plane machine [%s] of the cluster [%s]", lbPoolName, machine.Name, vcdCluster.Name)
}
controlPlaneIPs, err := gateway.GetLoadBalancerPoolMemberIPs(ctx, lbPoolRef)
if err != nil {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.LoadBalancerError, "", machine.Name, fmt.Sprintf("Error retrieving/updating lpool members [%s]: %v", lbPoolName, err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add LoadBalancerError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err,
"Error retrieving/updating load balancer pool members [%s] for the "+
"control plane machine [%s] of the cluster [%s]", lbPoolName, machine.Name, vcdCluster.Name)
}
err = capvcdRdeManager.RdeManager.RemoveErrorByNameOrIdFromErrorSet(ctx, vcdsdk.ComponentCAPVCD, capisdk.LoadBalancerError, "", "")
if err != nil {
log.Error(err, "failed to remove LoadBalancerError from RDE", "rdeID", vcdCluster.Status.InfraId)
}
updatedIPs := append(controlPlaneIPs, machineAddress)
updatedUniqueIPs := cpiutil.NewSet(updatedIPs).GetElements()
resourcesAllocated := &cpiutil.AllocatedResourcesMap{}
var oneArm *vcdsdk.OneArm = nil
if vcdCluster.Spec.LoadBalancerConfigSpec.UseOneArm {
oneArm = &OneArmDefault
}
// At this point the vcdCluster.Spec.ControlPlaneEndpoint should have been set correctly.
_, err = gateway.UpdateLoadBalancer(ctx, lbPoolName, virtualServiceName, updatedUniqueIPs,
int32(vcdCluster.Spec.ControlPlaneEndpoint.Port), int32(vcdCluster.Spec.ControlPlaneEndpoint.Port),
oneArm, !vcdCluster.Spec.LoadBalancerConfigSpec.UseOneArm, "TCP", resourcesAllocated)
if err != nil {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("%v", err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err,
"Error updating the load balancer pool [%s] for the "+
"control plane machine [%s] of the cluster [%s]", lbPoolName, machine.Name, vcdCluster.Name)
}
log.Info("Updated the load balancer pool with the control plane machine IP",
"lbpool", lbPoolName)
}*/
// only resize hard disk if the user has requested so by specifying such in the VCDMachineTemplate spec
// check isn't strictly required as we ensure that specified number is larger than what's in the template and left
// empty this will just be 0. However, this makes it clear from a standpoint of inspecting the code what we are doing
if !vcdMachine.Spec.DiskSize.IsZero() {
// go-vcd expects value in MB (2^10 = 1024 * 1024 bytes), so we scale it as such
diskSize, ok := vcdMachine.Spec.DiskSize.AsInt64()
if !ok {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("%v", err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{},
fmt.Errorf("error while provisioning the infrastructure VM for the machine [%s] of the cluster [%s]; failed to parse disk size quantity [%s]", vm.VM.Name, vApp.VApp.Name, vcdMachine.Spec.DiskSize.String())
}
diskSize = int64(math.Floor(float64(diskSize) / float64(Mebibyte)))
diskSettings := vm.VM.VmSpecSection.DiskSection.DiskSettings
// if the specified disk size is less than what is defined in the template, then we ignore the field
if len(diskSettings) != 0 && diskSettings[0].SizeMb < diskSize {
log.Info(
fmt.Sprintf("resizing hard disk on VM for machine [%s] of cluster [%s]; resizing from [%dMB] to [%dMB]",
vm.VM.Name, vApp.VApp.Name, diskSettings[0].SizeMb, diskSize))
diskSettings[0].SizeMb = diskSize
vm.VM.VmSpecSection.DiskSection.DiskSettings = diskSettings
if _, err = vm.UpdateInternalDisks(vm.VM.VmSpecSection); err != nil {
err1 := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("%v", err))
if err1 != nil {
log.Error(err1, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{},
errors.Wrapf(err, "Error while provisioning the infrastructure VM for the machine [%s] of the cluster [%s]; failed to resize hard disk", vm.VM.Name, vApp.VApp.Name)
}
}
if err = capvcdRdeManager.RdeManager.RemoveErrorByNameOrIdFromErrorSet(ctx, vcdsdk.ComponentCAPVCD, capisdk.VCDMachineCreationError, "", ""); err != nil {
log.Error(err, "failed to remove VCDMachineCreationError from RDE")
}
}
vmStatus, err := vm.GetStatus()
if err != nil {
err1 := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("%v", err))
if err1 != nil {
log.Error(err1, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{},
errors.Wrapf(err, "Error w```hile provisioning the infrastructure VM for the machine [%s] of the cluster [%s]; failed to get status of vm", vm.VM.Name, vApp.VApp.Name)
}
if err = capvcdRdeManager.RdeManager.RemoveErrorByNameOrIdFromErrorSet(ctx, vcdsdk.ComponentCAPVCD, capisdk.VCDMachineCreationError, "", ""); err != nil {
log.Error(err, "failed to remove VCDMachineCreationError from RDE")
}
if vmStatus != "POWERED_ON" {
// try to power on the VM
b64CloudInitScript := b64.StdEncoding.EncodeToString(mergedCloudInitBytes)
keyVals := map[string]string{
"guestinfo.userdata": b64CloudInitScript,
"guestinfo.userdata.encoding": "base64",
"disk.enableUUID": "1",
}
for key, val := range keyVals {
err = vdcManager.SetVmExtraConfigKeyValue(vm, key, val, true)
if err != nil {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("%v", err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error while enabling cloudinit on the machine [%s/%s]; unable to set vm extra config key [%s] for vm ",
vcdCluster.Name, vm.VM.Name, key)
}
if err = vm.Refresh(); err != nil {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("Unable to refresh vm: %v", err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error while enabling cloudinit on the machine [%s/%s]; unable to refresh vm", vcdCluster.Name, vm.VM.Name)
}
if err = vApp.Refresh(); err != nil {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("Unable to refresh vm: %v", err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error while enabling cloudinit on the machine [%s/%s]; unable to refresh vapp", vAppName, vm.VM.Name)
}
err = capvcdRdeManager.RdeManager.RemoveErrorByNameOrIdFromErrorSet(ctx, vcdsdk.ComponentCAPVCD, capisdk.VCDMachineCreationError, "", machine.Name)
if err != nil {
log.Error(err, "failed to remove VCDMachineCreationError from RDE", "rdeID", vcdCluster.Status.InfraId)
}
log.Info(fmt.Sprintf("Configured the infra machine with variable [%s] to enable cloud-init", key))
}
task, err := vm.PowerOn()
if err != nil {
err1 := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("%v", err))
if err1 != nil {
log.Error(err1, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error while deploying infra for the machine [%s/%s]; unable to power on VM", vcdCluster.Name, vm.VM.Name)
}
if err = task.WaitTaskCompletion(); err != nil {
err1 := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("%v", err))
if err1 != nil {
log.Error(err1, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error while deploying infra for the machine [%s/%s]; error waiting for VM power-on task completion", vcdCluster.Name, vm.VM.Name)
}
if err = vApp.Refresh(); err != nil {
err1 := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("%v", err))
if err1 != nil {
log.Error(err1, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error while deploying infra for the machine [%s/%s]; unable to refresh vapp after VM power-on", vAppName, vm.VM.Name)
}
}
if hasCloudInitFailedBefore, err := r.hasCloudInitExecutionFailedBefore(ctx, workloadVCDClient, vm); hasCloudInitFailedBefore {
err1 := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineScriptExecutionError, "", machine.Name, fmt.Sprintf("%v", err))
if err1 != nil {
log.Error(err1, "failed to add VCDMachineScriptExecutionError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error bootstrapping the machine [%s/%s]; machine is probably in unreconciliable state", vAppName, vm.VM.Name)
}
err = capvcdRdeManager.AddToEventSet(ctx, capisdk.InfraVmPoweredOn, "", machine.Name, "", skipRDEEventUpdates)
if err != nil {
log.Error(err, "failed to add InfraVmPoweredOn event into RDE", "rdeID", vcdCluster.Status.InfraId)
}
err = capvcdRdeManager.RdeManager.RemoveErrorByNameOrIdFromErrorSet(ctx, vcdsdk.ComponentCAPVCD, capisdk.VCDMachineCreationError, "", machine.Name)
if err != nil {
log.Error(err, "failed to remove VCDMachineCreationError from RDE", "rdeID", vcdCluster.Status.InfraId)
}
//Todo: add remove here
// wait for each vm phase
phases := controlPlanePostCustPhases
if !useControlPlaneScript {
if vcdMachine.Spec.EnableNvidiaGPU {
phases = []string{joinPostCustPhases[0], joinPostCustPhases[1], NvidiaRuntimeInstall}
} else {
phases = joinPostCustPhases
}
}
for _, phase := range phases {
if err = vApp.Refresh(); err != nil {
err1 := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineScriptExecutionError, "", machine.Name, fmt.Sprintf("%v", err))
if err1 != nil {
log.Error(err1, "failed to add VCDMachineScriptExecutionError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{},
errors.Wrapf(err, "Error while bootstrapping the machine [%s/%s]; unable to refresh vapp",
vAppName, vm.VM.Name)
}
log.Info(fmt.Sprintf("Start: waiting for the bootstrapping phase [%s] to complete", phase))
if err = r.waitForPostCustomizationPhase(ctx, workloadVCDClient, vm, phase); err != nil {
log.Error(err, fmt.Sprintf("Error waiting for the bootstrapping phase [%s] to complete", phase))
err1 := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineScriptExecutionError, "", machine.Name, fmt.Sprintf("%v", err))
if err1 != nil {
log.Error(err1, "failed to add VCDMachineScriptExecutionError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Error while bootstrapping the machine [%s/%s]; unable to wait for post customization phase [%s]",
vAppName, vm.VM.Name, phase)
}
log.Info(fmt.Sprintf("End: waiting for the bootstrapping phase [%s] to complete", phase))
}
err = capvcdRdeManager.RdeManager.RemoveErrorByNameOrIdFromErrorSet(ctx, vcdsdk.ComponentCAPVCD, capisdk.VCDMachineScriptExecutionError, "", "")
if err != nil {
log.Error(err, "failed to remove VCDMachineScriptExecutionError from RDE", "rdeID", vcdCluster.Status.InfraId)
}
log.Info("Successfully bootstrapped the machine")
err = capvcdRdeManager.AddToEventSet(ctx, capisdk.InfraVmBootstrapped, "", machine.Name, "", skipRDEEventUpdates)
if err != nil {
log.Error(err, "failed to add InfraVmBootstrapped event into RDE", "rdeID", vcdCluster.Status.InfraId)
}
if err = vm.Refresh(); err != nil {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("Unable to refresh vm: %v", err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Unexpected error after the machine [%s/%s] is bootstrapped; unable to refresh vm", vAppName, vm.VM.Name)
}
if err = vApp.Refresh(); err != nil {
updatedErr := capvcdRdeManager.AddToErrorSet(ctx, capisdk.VCDMachineCreationError, "", machine.Name, fmt.Sprintf("Unable to refresh vApp: %v", err))
if updatedErr != nil {
log.Error(updatedErr, "failed to add VCDMachineCreationError into RDE", "rdeID", vcdCluster.Status.InfraId)
}
return ctrl.Result{}, errors.Wrapf(err, "Unexpected error after the machine [%s/%s] is bootstrapped; unable to refresh vapp", vAppName, vm.VM.Name)
}
err = capvcdRdeManager.RdeManager.RemoveErrorByNameOrIdFromErrorSet(ctx, vcdsdk.ComponentCAPVCD, capisdk.VCDMachineCreationError, "", machine.Name)
if err != nil {
log.Error(err, "failed to remove VCDMachineCreationError from RDE", "rdeID", vcdCluster.Status.InfraId)
}
vcdMachine.Spec.Bootstrapped = true
conditions.MarkTrue(vcdMachine, BootstrapExecSucceededCondition)
// Set ProviderID so the Cluster API Machine Controller can pull it
providerID := fmt.Sprintf("%s://%s", infrav1.VCDProviderID, vm.VM.ID)
vcdMachine.Spec.ProviderID = &providerID
vcdMachine.Status.Ready = true
vcdMachine.Status.Template = vcdMachine.Spec.Template
vcdMachine.Status.ProviderID = vcdMachine.Spec.ProviderID
vcdMachine.Status.SizingPolicy = vcdMachine.Spec.SizingPolicy
vcdMachine.Status.PlacementPolicy = vcdMachine.Spec.PlacementPolicy
vcdMachine.Status.NvidiaGPUEnabled = vcdMachine.Spec.EnableNvidiaGPU
conditions.MarkTrue(vcdMachine, ContainerProvisionedCondition)
return ctrl.Result{}, nil
}
// getPrimaryNetwork returns the primary network based on vm.NetworkConnectionSection.PrimaryNetworkConnectionIndex
// It is not possible to assume vm.NetworkConnectionSection.NetworkConnection[0] is the primary network when there are
// multiple networks attached to the VM.
func getPrimaryNetwork(vm *types.Vm) *types.NetworkConnection {
for _, network := range vm.NetworkConnectionSection.NetworkConnection {
if network.NetworkConnectionIndex == vm.NetworkConnectionSection.PrimaryNetworkConnectionIndex {
return network
}
}
return nil
}
// reconcileVMNetworks ensures that desired networks are attached to VMs
// networks[0] refers the primary network
func (r *VCDMachineReconciler) reconcileVMNetworks(vdcManager *vcdsdk.VdcManager, vApp *govcd.VApp, vm *govcd.VM, networks []string) error {
connections, err := vm.GetNetworkConnectionSection()
if err != nil {
return errors.Errorf("Failed to get attached networks to VM")
}
desiredConnectionArray := make([]*types.NetworkConnection, len(networks))
for index, ovdcNetwork := range networks {
err = ensureNetworkIsAttachedToVApp(vdcManager, vApp, ovdcNetwork)
if err != nil {
return errors.Errorf("Error ensuring network [%s] is attached to vApp", ovdcNetwork)
}
desiredConnectionArray[index] = getNetworkConnection(connections, ovdcNetwork)
}
if !containsTheSameElements(connections.NetworkConnection, desiredConnectionArray) {
connections.NetworkConnection = desiredConnectionArray
// update connection indexes for deterministic reconcilation
connections.PrimaryNetworkConnectionIndex = 0
for index, connection := range connections.NetworkConnection {
connection.NetworkConnectionIndex = index
}
err = vm.UpdateNetworkConnectionSection(connections)
if err != nil {
return errors.Errorf("failed to update networks of VM")
}
}
return nil
}
// containsTheSameElements checks all elements in the two array are the same regardless of order
func containsTheSameElements(array1 []*types.NetworkConnection, array2 []*types.NetworkConnection) bool {
if len(array1) != len(array2) {
return false
}
OUTER:
for _, element1 := range array1 {
for _, element2 := range array2 {
if reflect.DeepEqual(element1, element2) {
continue OUTER
}
}
return false
}
return true
}
func getNetworkConnection(connections *types.NetworkConnectionSection, ovdcNetwork string) *types.NetworkConnection {
for _, existingConnection := range connections.NetworkConnection {
if existingConnection.Network == ovdcNetwork {
return existingConnection
}
}
return &types.NetworkConnection{
Network: ovdcNetwork,
NeedsCustomization: false,
IsConnected: true,
IPAddressAllocationMode: "POOL",
NetworkAdapterType: "VMXNET3",
}
}
func ensureNetworkIsAttachedToVApp(vdcManager *vcdsdk.VdcManager, vApp *govcd.VApp, ovdcNetworkName string) error {
for _, vAppNetwork := range vApp.VApp.NetworkConfigSection.NetworkNames() {
if vAppNetwork == ovdcNetworkName {
return nil
}
}
ovdcNetwork, err := vdcManager.Vdc.GetOrgVdcNetworkByName(ovdcNetworkName, true)
if err != nil {
return fmt.Errorf("unable to get ovdc network [%s]: [%v]", ovdcNetworkName, err)
}
_, err = vApp.AddOrgNetwork(&govcd.VappNetworkSettings{}, ovdcNetwork.OrgVDCNetwork, false)
if err != nil {
return fmt.Errorf("unable to add ovdc network [%v] to vApp [%s]: [%v]",
ovdcNetwork, vApp.VApp.Name, err)
}
return nil
}
func (r *VCDMachineReconciler) getBootstrapData(ctx context.Context, machine *clusterv1.Machine) (string, error) {
log := ctrl.LoggerFrom(ctx)
if machine.Spec.Bootstrap.DataSecretName == nil {
return "", errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil")
}
s := &corev1.Secret{}
key := client.ObjectKey{Namespace: machine.GetNamespace(), Name: *machine.Spec.Bootstrap.DataSecretName}
if err := r.Client.Get(ctx, key, s); err != nil {
return "", errors.Wrapf(err,
"failed to retrieve bootstrap data secret for VCDMachine %s/%s",
machine.GetNamespace(), machine.GetName())
}
value, ok := s.Data["value"]
if !ok {
return "", errors.New("error retrieving bootstrap data: secret value key is missing")
}
log.Info(fmt.Sprintf("Auto-generated bootstrap script: [%s]", string(value)))
return string(value), nil
}
func (r *VCDMachineReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine,
vcdMachine *infrav1.VCDMachine, vcdCluster *infrav1.VCDCluster) (ctrl.Result, error) {
log := ctrl.LoggerFrom(ctx, "machine", machine.Name, "cluster", vcdCluster.Name)
patchHelper, err := patch.NewHelper(vcdMachine, r.Client)
if err != nil {
return ctrl.Result{}, err
}
conditions.MarkFalse(vcdMachine, ContainerProvisionedCondition,
clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "")
if err := patchVCDMachine(ctx, patchHelper, vcdMachine); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "Failed to patch VCDMachine [%s/%s]", vcdCluster.Name, vcdMachine.Name)
}
if vcdCluster.Spec.Site == "" {
controllerutil.RemoveFinalizer(vcdMachine, infrav1.MachineFinalizer)
return ctrl.Result{}, nil
}
userCreds, err := getUserCredentialsForCluster(ctx, r.Client, vcdCluster.Spec.UserCredentialsContext)
if err != nil {