-
Notifications
You must be signed in to change notification settings - Fork 24.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ML][Data Frame] add support for wait_for_checkpoint
flag on _stop
API
#45469
Changes from 2 commits
195d0f3
c8c56f3
1356bfd
c08deb5
ea4cd47
d87f3ac
5482584
a8dcefb
b467f22
8ef6160
885aed2
0acffb3
10bd717
ffd8acb
33a4096
0ef6e34
e8db75c
4a83849
59f509c
58d1336
ba99096
e3e1c50
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,9 @@ public class DataFrameTransformState implements Task.Status, PersistentTaskState | |
@Nullable | ||
private NodeAttributes node; | ||
|
||
// TODO: 8.x this needs to be deprecated and we move towards a STOPPING TASK_STATE | ||
private boolean shouldStopAtCheckpoint; | ||
|
||
public static final ParseField TASK_STATE = new ParseField("task_state"); | ||
public static final ParseField INDEXER_STATE = new ParseField("indexer_state"); | ||
|
||
|
@@ -54,6 +57,7 @@ public class DataFrameTransformState implements Task.Status, PersistentTaskState | |
public static final ParseField PROGRESS = new ParseField("progress"); | ||
public static final ParseField NODE = new ParseField("node"); | ||
|
||
|
||
@SuppressWarnings("unchecked") | ||
public static final ConstructingObjectParser<DataFrameTransformState, Void> PARSER = new ConstructingObjectParser<>(NAME, | ||
true, | ||
|
@@ -93,14 +97,26 @@ public DataFrameTransformState(DataFrameTransformTaskState taskState, | |
long checkpoint, | ||
@Nullable String reason, | ||
@Nullable DataFrameTransformProgress progress, | ||
@Nullable NodeAttributes node) { | ||
@Nullable NodeAttributes node, | ||
boolean shouldStopAtCheckpoint) { | ||
this.taskState = taskState; | ||
this.indexerState = indexerState; | ||
this.position = position; | ||
this.checkpoint = checkpoint; | ||
this.reason = reason; | ||
this.progress = progress; | ||
this.node = node; | ||
this.shouldStopAtCheckpoint = shouldStopAtCheckpoint; | ||
} | ||
|
||
public DataFrameTransformState(DataFrameTransformTaskState taskState, | ||
IndexerState indexerState, | ||
@Nullable DataFrameIndexerPosition position, | ||
long checkpoint, | ||
@Nullable String reason, | ||
@Nullable DataFrameTransformProgress progress, | ||
@Nullable NodeAttributes node) { | ||
this(taskState, indexerState, position, checkpoint, reason, progress, node, false); | ||
} | ||
|
||
public DataFrameTransformState(DataFrameTransformTaskState taskState, | ||
|
@@ -129,6 +145,9 @@ public DataFrameTransformState(StreamInput in) throws IOException { | |
} else { | ||
node = null; | ||
} | ||
if (in.getVersion().onOrAfter(Version.V_8_0_0)) { | ||
shouldStopAtCheckpoint = in.readBoolean(); | ||
} | ||
} | ||
|
||
public DataFrameTransformTaskState getTaskState() { | ||
|
@@ -164,6 +183,14 @@ public DataFrameTransformState setNode(NodeAttributes node) { | |
return this; | ||
} | ||
|
||
public boolean shouldStopAtCheckpoint() { | ||
return shouldStopAtCheckpoint; | ||
} | ||
|
||
public void setShouldStopAtCheckoint(boolean shouldStopAtCheckpoint) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Objects that are stored in the cluster state are supposed to be immutable. This class was already breaching that rule for node. It doesn't cause a problem given the way it's used because The reason it's dangerous to have a mutable object in the cluster state is this:
The problem arises if the reference in step 1 referred to the actual object in the local cluster state. If it did then the check for changes in step 4 won't find any changes because when you updated your object that was of a type that's stored in the cluster state it actually did update the local cluster state. This then leads to the cluster state of the current node being different to the cluster state of all the other nodes, and you'll never find out from testing in a single node cluster. |
||
this.shouldStopAtCheckpoint = shouldStopAtCheckpoint; | ||
} | ||
|
||
public static DataFrameTransformState fromXContent(XContentParser parser) { | ||
try { | ||
return PARSER.parse(parser, null); | ||
|
@@ -214,6 +241,9 @@ public void writeTo(StreamOutput out) throws IOException { | |
if (out.getVersion().onOrAfter(Version.V_7_3_0)) { | ||
out.writeOptionalWriteable(node); | ||
} | ||
if (out.getVersion().onOrAfter(Version.V_8_0_0)) { | ||
out.writeBoolean(shouldStopAtCheckpoint); | ||
} | ||
} | ||
|
||
@Override | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not included in the X-Content representation. Is there a good reason for that?
I guess it's because we don't want this to go into the version of this object that gets persisted to the index as part of a
DataFrameTransformStoredDoc
? But omitting it from the X-Content representation also means it won't survive in cluster state if there's a full cluster restart.There are other comments saying that in 8.x we want to replace this with a
STOPPING
enum value. But that would be persisted both in theDataFrameTransformStoredDoc
and in the on-disk version of the cluster state. So there's an inconsistency here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@droberts195
If the clusterstate stores itself as XContent, how does it know how to deserialize the objects?
Also, if we are going to store this in the index, we may just want to bite the bullet and figure out how to ONLY store it in the index.