-
Notifications
You must be signed in to change notification settings - Fork 800
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix the issue that io delay injected does not take effect #121
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
2442712
Fix the issue that io delay injected does not take effect
cwen0 56c0e96
update iochaos document
cwen0 89189f9
update link
cwen0 7ab71cc
update iochaos document
cwen0 a9e2259
Merge branch 'master' into fix_iochaos
zhouqiang-cl 11de512
address comments
cwen0 52d49c2
Merge branch 'master' into fix_iochaos
zhouqiang-cl 1286047
Merge branch 'fix_iochaos' of github.com:cwen0/chaos-mesh into fix_io…
cwen0 9e436b2
fix logs
cwen0 File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ import ( | |
"golang.org/x/sync/errgroup" | ||
|
||
"github.com/go-logr/logr" | ||
"github.com/golang/protobuf/ptypes/empty" | ||
|
||
"github.com/pingcap/chaos-mesh/api/v1alpha1" | ||
"github.com/pingcap/chaos-mesh/controllers/twophase" | ||
|
@@ -161,29 +162,31 @@ func (r *Reconciler) cleanFinalizersAndRecover(ctx context.Context, iochaos *v1a | |
func (r *Reconciler) recoverPod(ctx context.Context, pod *v1.Pod, iochaos *v1alpha1.IoChaos) error { | ||
r.Log.Info("Recovering", "namespace", pod.Namespace, "name", pod.Name) | ||
|
||
var ns v1.Namespace | ||
if err := r.Get(ctx, types.NamespacedName{Name: pod.Namespace}, &ns); err != nil { | ||
return err | ||
} | ||
cctx, cancel := context.WithTimeout(ctx, 2*time.Minute) | ||
defer cancel() | ||
err := wait.PollUntil(2*time.Second, func() (bool, error) { | ||
if err := r.recoverInjectAction(ctx, pod, iochaos); err != nil { | ||
if utils.IsCaredNetError(err) { | ||
r.Log.Info("Recover I/O chaos action, network is not ok, retrying...", | ||
"namespace", pod.Namespace, "name", pod.Name) | ||
return false, nil | ||
} | ||
|
||
annotations := ns.GetAnnotations() | ||
if annotations == nil { | ||
annotations = make(map[string]string) | ||
} | ||
return false, err | ||
} | ||
|
||
if _, ok := annotations[v1alpha1.WebhookInitPodAnnotationKey]; ok { | ||
return r.recoverInjectAction(ctx, pod, iochaos) | ||
} | ||
r.Log.Info("Recover I/O chaos action successfully") | ||
|
||
if err := utils.UnsetIoInjection(ctx, r.Client, pod, iochaos); err != nil { | ||
r.Log.Error(err, "failed to unset I/O injection", | ||
return true, nil | ||
}, cctx.Done()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When network is not ok, Poll loop and There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. timeout err |
||
|
||
if err != nil { | ||
r.Log.Error(err, "failed to recover I/O chaos action", | ||
"namespace", pod.Namespace, "name", pod.Name) | ||
return err | ||
} | ||
|
||
return r.Delete(ctx, pod, &client.DeleteOptions{ | ||
GracePeriodSeconds: new(int64), | ||
}) | ||
return nil | ||
} | ||
|
||
func (r *Reconciler) injectAllPods(ctx context.Context, pods []v1.Pod, iochaos *v1alpha1.IoChaos) error { | ||
|
@@ -210,67 +213,29 @@ func (r *Reconciler) injectAllPods(ctx context.Context, pods []v1.Pod, iochaos * | |
func (r *Reconciler) injectPod(ctx context.Context, pod *v1.Pod, iochaos *v1alpha1.IoChaos) error { | ||
r.Log.Info("Inject I/O chaos action", "namespace", pod.Namespace, "name", pod.Name) | ||
|
||
if err := utils.SetIoInjection(ctx, r.Client, pod, iochaos); err != nil { | ||
r.Log.Error(err, "failed to set I/O injection", | ||
"namespace", pod.Namespace, "name", pod.Name) | ||
return err | ||
} | ||
|
||
var ns v1.Namespace | ||
if err := r.Get(ctx, types.NamespacedName{Name: pod.Namespace}, &ns); err != nil { | ||
return err | ||
} | ||
|
||
annotations := ns.GetAnnotations() | ||
if annotations == nil { | ||
annotations = make(map[string]string) | ||
} | ||
|
||
if _, ok := annotations[v1alpha1.WebhookInitPodAnnotationKey]; !ok { | ||
// need to recreate pod when to inject sidecar | ||
time.Sleep(1 * time.Second) | ||
err := r.Delete(ctx, pod, &client.DeleteOptions{ | ||
GracePeriodSeconds: new(int64), | ||
}) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
|
||
// TODO: optimize inject action | ||
go func() { | ||
cctx, cancel := context.WithTimeout(ctx, 5*time.Minute) | ||
defer cancel() | ||
err := wait.PollUntil(2*time.Second, func() (bool, error) { | ||
var npod v1.Pod | ||
err := r.Client.Get(ctx, types.NamespacedName{ | ||
Namespace: pod.Namespace, | ||
Name: pod.Name, | ||
}, &npod) | ||
if err != nil { | ||
r.Log.Error(err, "failed to get pod", "namespace", pod.Namespace, "name", pod.Name) | ||
cctx, cancel := context.WithTimeout(ctx, 2*time.Minute) | ||
defer cancel() | ||
err := wait.PollUntil(2*time.Second, func() (bool, error) { | ||
if err := r.injectAction(ctx, pod, iochaos); err != nil { | ||
if utils.IsCaredNetError(err) { | ||
r.Log.Info("Inject I/O chaos action, network is not ok, retrying...", | ||
"namespace", pod.Namespace, "name", pod.Name) | ||
return false, nil | ||
} | ||
|
||
if err := r.injectAction(ctx, &npod, iochaos); err != nil { | ||
if utils.IsCaredNetError(err) { | ||
r.Log.Info("Inject I/O chaos action, network is not ok, retrying...", | ||
"namespace", pod.Namespace, "name", pod.Name) | ||
return false, nil | ||
} | ||
return false, err | ||
} | ||
|
||
return false, err | ||
} | ||
r.Log.Info("Inject I/O chaos action successfully") | ||
|
||
r.Log.Info("Inject I/O chaos action successfully") | ||
return true, nil | ||
}, cctx.Done()) | ||
|
||
return true, nil | ||
}, cctx.Done()) | ||
if err != nil { | ||
r.Log.Error(err, "failed to inject I/O chaos action", | ||
"namespace", pod.Namespace, "name", pod.Name) | ||
} | ||
}() | ||
if err != nil { | ||
r.Log.Error(err, "failed to inject I/O chaos action", | ||
"namespace", pod.Namespace, "name", pod.Name) | ||
return err | ||
} | ||
|
||
return nil | ||
} | ||
|
@@ -294,7 +259,13 @@ func (r *Reconciler) injectAction(ctx context.Context, pod *v1.Pod, iochaos *v1a | |
return err | ||
} | ||
|
||
_, err = cli.SetFault(ctx, req) | ||
if len(req.Methods) > 0 { | ||
_, err = cli.SetFault(ctx, req) | ||
return err | ||
} | ||
|
||
// inject fault to all methods if the the methods is empty. | ||
_, err = cli.SetFaultAll(ctx, req) | ||
return err | ||
} | ||
|
||
|
@@ -311,6 +282,6 @@ func (r *Reconciler) recoverInjectAction(ctx context.Context, pod *v1.Pod, iocha | |
return err | ||
} | ||
|
||
_, err = cli.RecoverAll(ctx, nil) | ||
_, err = cli.RecoverAll(ctx, &empty.Empty{}) | ||
return err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why add the default branch? this may cause there exist multiple configWatchers that notify the eventsCh at the same time?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, this
branch
just used to restartconfigWatcher.Watch
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I misunderstood the stopCh before, I thought it was used by configWatcher to notify the outer goroutine that it had been finished.
If
configWatcher.Watch
oreventsCh
first received the msg from stopCh before the outer goroutine, theconfigWatcher.Watch
will be restarted forever because case <- stopCh at line 187 won't receive msg again? how to solve this problem?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
stopCh
will be closed, so all receiver will receive this event.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got, that's ok
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Restarting is ok for ErrWatchChannelClosed case, is that ok for other err?