1
- use crate :: state_tool_helper:: StateToolHelper ;
1
+ use crate :: {
2
+ layout:: Layout ,
3
+ state_tool_helper:: StateToolHelper ,
4
+ target_subnet:: TargetSubnet ,
5
+ utils:: { find_expected_state_hash_for_subnet_id, get_state_hash} ,
6
+ } ;
2
7
3
8
use ic_base_types:: SubnetId ;
4
9
use ic_metrics:: MetricsRegistry ;
5
10
use ic_recovery:: {
6
11
error:: { RecoveryError , RecoveryResult } ,
7
12
file_sync_helper:: rsync,
8
13
steps:: Step ,
9
- Recovery , CHECKPOINTS , CUPS_DIR , IC_REGISTRY_LOCAL_STORE , IC_STATE_DIR ,
14
+ Recovery , CUPS_DIR , IC_REGISTRY_LOCAL_STORE ,
10
15
} ;
11
16
use ic_registry_routing_table:: CanisterIdRange ;
12
17
use ic_state_manager:: split:: resolve_ranges_and_split;
13
- use slog:: Logger ;
14
-
15
- use std:: path:: PathBuf ;
16
-
17
- const MANIFEST_FILE_NAME : & str = "manifest.data" ;
18
- const EXPECTED_MANIFESTS_FILE_NAME : & str = "expected_manifests.data" ;
18
+ use slog:: { info, Logger } ;
19
19
20
20
pub ( crate ) struct CopyWorkDirStep {
21
- pub ( crate ) from : PathBuf ,
22
- pub ( crate ) to : PathBuf ,
21
+ pub ( crate ) layout : Layout ,
23
22
pub ( crate ) logger : Logger ,
24
23
}
25
24
26
25
impl Step for CopyWorkDirStep {
27
26
fn descr ( & self ) -> String {
28
27
format ! (
29
28
"Copying {} to {}. Excluding cups and registry local store" ,
30
- self . from . display( ) ,
31
- self . to . display( )
29
+ self . layout . work_dir ( TargetSubnet :: Source ) . display( ) ,
30
+ self . layout . work_dir ( TargetSubnet :: Destination ) . display( ) ,
32
31
)
33
32
}
34
33
35
34
fn exec ( & self ) -> RecoveryResult < ( ) > {
36
35
rsync (
37
36
& self . logger ,
38
37
vec ! [ CUPS_DIR , IC_REGISTRY_LOCAL_STORE ] ,
39
- & format ! ( "{}/" , self . from. display( ) ) ,
40
- & self . to . display ( ) . to_string ( ) ,
38
+ & format ! ( "{}/" , self . layout. work_dir( TargetSubnet :: Source ) . display( ) ) ,
39
+ & self
40
+ . layout
41
+ . work_dir ( TargetSubnet :: Destination )
42
+ . display ( )
43
+ . to_string ( ) ,
41
44
/*require_confirmation=*/ false ,
42
45
/*key_file=*/ None ,
43
46
)
@@ -74,7 +77,8 @@ pub(crate) struct SplitStateStep {
74
77
pub ( crate ) subnet_id : SubnetId ,
75
78
pub ( crate ) state_split_strategy : StateSplitStrategy ,
76
79
pub ( crate ) state_tool_helper : StateToolHelper ,
77
- pub ( crate ) work_dir : PathBuf ,
80
+ pub ( crate ) layout : Layout ,
81
+ pub ( crate ) target_subnet : TargetSubnet ,
78
82
pub ( crate ) logger : Logger ,
79
83
}
80
84
@@ -86,24 +90,23 @@ impl Step for SplitStateStep {
86
90
and removing all but the highest checkpoints. Work dir: {}",
87
91
retained_canister_id_ranges,
88
92
self . subnet_id,
89
- self . work_dir. display( ) ,
93
+ self . layout . work_dir( self . target_subnet ) . display( ) ,
90
94
) ,
91
95
StateSplitStrategy :: Drop ( dropped_canister_id_ranges) => format ! (
92
96
"Dropping the canister id ranges {:#?} from state for the subnet {}. \
93
97
and removing all but the highest checkpoints. Work dir: {}",
94
98
dropped_canister_id_ranges,
95
99
self . subnet_id,
96
- self . work_dir. display( ) ,
100
+ self . layout . work_dir( self . target_subnet ) . display( ) ,
97
101
) ,
98
102
}
99
103
}
100
104
101
105
fn exec ( & self ) -> RecoveryResult < ( ) > {
102
- let state_dir = self . work_dir . join ( IC_STATE_DIR ) ;
103
- let checkpoints_dir = state_dir. join ( CHECKPOINTS ) ;
104
-
106
+ // 1. Split the state.
107
+ info ! ( self . logger, "Splitting the state" ) ;
105
108
resolve_ranges_and_split (
106
- state_dir ,
109
+ self . layout . ic_state_dir ( self . target_subnet ) ,
107
110
self . subnet_id . get ( ) ,
108
111
self . state_split_strategy . retained_canister_id_ranges ( ) ,
109
112
self . state_split_strategy . dropped_canister_id_ranges ( ) ,
@@ -112,61 +115,79 @@ impl Step for SplitStateStep {
112
115
)
113
116
. map_err ( RecoveryError :: OutputError ) ?;
114
117
115
- let ( max_name, _) = Recovery :: get_latest_checkpoint_name_and_height ( & checkpoints_dir) ?;
116
- let max_checkpoint = checkpoints_dir. join ( max_name) ;
117
- let manifest_path = max_checkpoint. join ( MANIFEST_FILE_NAME ) ;
118
+ // 2. Compute the manifest
119
+ info ! ( self . logger, "Computing the state manifest" ) ;
120
+ let latest_checkpoint_dir = self . layout . latest_checkpoint_dir ( self . target_subnet ) ?;
121
+ let manifest_path = self . layout . actual_manifest_file ( self . subnet_id ) ;
118
122
119
123
self . state_tool_helper
120
- . compute_manifest ( & max_checkpoint, & manifest_path) ?;
121
- self . state_tool_helper . verify_manifest ( & manifest_path) ?;
124
+ . compute_manifest ( & latest_checkpoint_dir, & manifest_path) ?;
125
+
126
+ // 3. Validate the manifest
127
+ info ! ( self . logger, "Validating the manifest" ) ;
128
+ self . state_tool_helper
129
+ . verify_manifest ( & manifest_path)
130
+ . map_err ( |err| {
131
+ RecoveryError :: ValidationFailed ( format ! ( "Manifest verification failed: {}" , err) )
132
+ } ) ?;
133
+
134
+ let expected_state_hash = find_expected_state_hash_for_subnet_id (
135
+ self . layout . expected_manifests_file ( ) ,
136
+ self . subnet_id ,
137
+ ) ?;
138
+ let actual_state_hash = get_state_hash ( & latest_checkpoint_dir) ?;
139
+
140
+ if actual_state_hash != expected_state_hash {
141
+ return Err ( RecoveryError :: ValidationFailed ( format ! (
142
+ "State hash after split {} doesn't match the expected state hash {}" ,
143
+ actual_state_hash, expected_state_hash,
144
+ ) ) ) ;
145
+ }
146
+
147
+ info ! ( self . logger, "Validation passed!" ) ;
148
+ // 4. Remove all the other checkpoints
149
+ info ! ( self . logger, "Removing past checkpoints" ) ;
122
150
123
- Recovery :: remove_all_but_highest_checkpoints ( & checkpoints_dir, & self . logger ) . map ( |_| ( ) )
151
+ Recovery :: remove_all_but_highest_checkpoints (
152
+ & self . layout . checkpoints_dir ( self . target_subnet ) ,
153
+ & self . logger ,
154
+ )
155
+ . map ( |_| ( ) )
124
156
}
125
157
}
126
158
127
159
pub ( crate ) struct ComputeExpectedManifestsStep {
128
- pub ( crate ) recovery_dir : PathBuf ,
129
160
pub ( crate ) state_tool_helper : StateToolHelper ,
130
161
pub ( crate ) source_subnet_id : SubnetId ,
131
162
pub ( crate ) destination_subnet_id : SubnetId ,
132
163
pub ( crate ) canister_id_ranges_to_move : Vec < CanisterIdRange > ,
133
- }
134
-
135
- impl ComputeExpectedManifestsStep {
136
- fn checkpoints ( & self ) -> PathBuf {
137
- self . recovery_dir
138
- . join ( "working_dir" )
139
- . join ( IC_STATE_DIR )
140
- . join ( CHECKPOINTS )
141
- }
164
+ pub ( crate ) layout : Layout ,
142
165
}
143
166
144
167
impl Step for ComputeExpectedManifestsStep {
145
168
fn descr ( & self ) -> String {
146
169
format ! (
147
170
"Compute the expected manifests of the states resulting from splitting the manifest \
148
171
at {} between {} (hosting all canisters in {:?}) and {} (all remaining canisters)",
149
- self . checkpoints ( ) . display( ) ,
172
+ self . layout . checkpoints_dir ( TargetSubnet :: Source ) . display( ) ,
150
173
self . destination_subnet_id,
151
174
self . canister_id_ranges_to_move,
152
175
self . source_subnet_id,
153
176
)
154
177
}
155
178
156
179
fn exec ( & self ) -> RecoveryResult < ( ) > {
157
- let checkpoints_dir = self . checkpoints ( ) ;
158
- let ( max_name , _ ) = Recovery :: get_latest_checkpoint_name_and_height ( & checkpoints_dir ) ? ;
159
- let max_checkpoint = checkpoints_dir . join ( max_name ) ;
160
- let manifest_path = self . recovery_dir . join ( MANIFEST_FILE_NAME ) ;
180
+ self . state_tool_helper . compute_manifest (
181
+ & self . layout . latest_checkpoint_dir ( TargetSubnet :: Source ) ? ,
182
+ self . layout . original_state_manifest_file ( ) ,
183
+ ) ? ;
161
184
162
- self . state_tool_helper
163
- . compute_manifest ( & max_checkpoint, & manifest_path) ?;
164
185
self . state_tool_helper . split_manifest (
165
- & manifest_path ,
186
+ self . layout . original_state_manifest_file ( ) ,
166
187
self . source_subnet_id ,
167
188
self . destination_subnet_id ,
168
189
& self . canister_id_ranges_to_move ,
169
- & self . recovery_dir . join ( EXPECTED_MANIFESTS_FILE_NAME ) ,
190
+ self . layout . expected_manifests_file ( ) ,
170
191
)
171
192
}
172
193
}
0 commit comments