diff --git a/test/train-sets/ref/dictionary_test.stderr b/test/train-sets/ref/dictionary_test.stderr
index 117ba4ae123..5cc7f7e9990 100644
--- a/test/train-sets/ref/dictionary_test.stderr
+++ b/test/train-sets/ref/dictionary_test.stderr
@@ -1,7 +1,4 @@
 ignoring namespaces beginning with: w 
-scanned dictionary 'dictionary_test.dict' from 'train-sets/dictionary_test.dict', hash=3226e82e3d58b6b2
-dictionary dictionary_test.dict contains 4 items
-scanned dictionary 'dictionary_test.dict.gz' from 'train-sets/dictionary_test.dict.gz', hash=3226e82e3d58b6b2
 Num weight bits = 18
 learning rate = 0.5
 initial_t = 0
@@ -10,6 +7,9 @@ decay_learning_rate = 1
 creating cache_file = train-sets/dictionary_test.dat.cache
 Reading datafile = train-sets/dictionary_test.dat
 num sources = 1
+scanned dictionary 'dictionary_test.dict' from 'train-sets/dictionary_test.dict', hash=3226e82e3d58b6b2
+dictionary dictionary_test.dict contains 4 items
+scanned dictionary 'dictionary_test.dict.gz' from 'train-sets/dictionary_test.dict.gz', hash=3226e82e3d58b6b2
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
 1.000000 1.000000            1            1.0   1.0000  -1.0000        3
diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
index de316f00693..231875d067e 100644
--- a/vowpalwabbit/cb_adf.cc
+++ b/vowpalwabbit/cb_adf.cc
@@ -39,7 +39,7 @@ struct cb_adf
   action_scores a_s;  // temporary storage for mtr and sm
   action_scores prob_s;  // temporary storage for sm; stores softmax values
   v_array<uint32_t> backup_nf; // temporary storage for sm; backup for numFeatures in examples
-  v_array<uint32_t> backup_weights;  // temporary storage for sm; backup for weights in examples
+  v_array<float> backup_weights;  // temporary storage for sm; backup for weights in examples
 
   uint64_t offset;
   bool no_predict;
@@ -152,7 +152,7 @@ void learn_SM(cb_adf& mydata, multi_learner& base, multi_ex& examples) {
     mydata.backup_nf.push_back(examples[current_action]->num_features);
 
     if (current_action == chosen_action)
-      examples[current_action]->weight = example_weight * (1.0 - mydata.prob_s[i].score);
+      examples[current_action]->weight = example_weight * (1.0f - mydata.prob_s[i].score);
     else
       examples[current_action]->weight = example_weight * mydata.prob_s[i].score;
 
diff --git a/vowpalwabbit/io_buf.h b/vowpalwabbit/io_buf.h
index 45a09b94fac..bd71349fc13 100644
--- a/vowpalwabbit/io_buf.h
+++ b/vowpalwabbit/io_buf.h
@@ -379,4 +379,4 @@ inline size_t bin_text_read_write_fixed_validated(
                                     msg,                   \
                                     text);                 \
         }                                                   \
-    while (0);
\ No newline at end of file
+    while (0);
diff --git a/vowpalwabbit/memory_tree.cc b/vowpalwabbit/memory_tree.cc
index 4e5bc8d701c..9729b51e640 100644
--- a/vowpalwabbit/memory_tree.cc
+++ b/vowpalwabbit/memory_tree.cc
@@ -20,7 +20,7 @@ namespace memory_tree_ns
 {
     ///////////////////////Helper//////////////////////////////
     //////////////////////////////////////////////////////////
-    template<typename T> 
+    template<typename T>
     void remove_at_index(v_array<T>& array, uint32_t index)
     {
         if (index >= array.size()){
@@ -38,8 +38,8 @@ namespace memory_tree_ns
         return;
     }
 
-    void copy_example_data(example* dst, example* src, int oas = false) //copy example data. 
-    { 
+    void copy_example_data(example* dst, example* src, int oas = false) //copy example data.
+    {
         if (oas == false){
             dst->l = src->l;
             dst->l.multi.label = src->l.multi.label;
@@ -51,7 +51,7 @@ namespace memory_tree_ns
         VW::copy_example_data(false, dst, src);
     }
 
-    inline void free_example(example* ec) 
+    inline void free_example(example* ec)
     {
         VW::dealloc_example(nullptr, *ec);
         free(ec);
@@ -66,7 +66,7 @@ namespace memory_tree_ns
         prod_f.delete_v();
         if (f2.indicies.size() == 0)
             return;
-    
+
         float denominator = pow(norm_sq1*norm_sq2,0.5f);
         size_t idx1 = 0;
         size_t idx2 = 0;
@@ -75,7 +75,7 @@ namespace memory_tree_ns
         {
             uint64_t ec1pos = f1.indicies[idx1];
             uint64_t ec2pos = f2.indicies[idx2];
-            
+
             if (ec1pos < ec2pos)
                 idx1++;
             else if(ec1pos > ec2pos)
@@ -89,7 +89,7 @@ namespace memory_tree_ns
         }
     }
 
-    int cmpfunc( const void *a, const void *b) 
+    int cmpfunc( const void *a, const void *b)
     {
         return *(char*)a - *(char*)b;
     }
@@ -101,10 +101,10 @@ namespace memory_tree_ns
         copy_example_data(&ec, &ec1, oas);
 
         ec.total_sum_feat_sq = 0.0;  //sort namespaces.  pass indices array into sort...template (leave this to the end)
-        
+
         qsort(ec1.indices.begin(), ec1.indices.size(), sizeof(namespace_index), cmpfunc);
         qsort(ec2.indices.begin(), ec2.indices.size(), sizeof(namespace_index), cmpfunc);
-        
+
         size_t idx1 = 0;
         size_t idx2 = 0;
         while (idx1 < ec1.indices.size() && idx2 < ec2.indices.size())
@@ -144,20 +144,20 @@ namespace memory_tree_ns
 
         double nl; //number of examples routed to left.
         double nr; //number of examples routed to right.
-        
+
         v_array<uint32_t> examples_index;
 
         node () //construct:
         {
             parent = 0;
-            internal = 0; //0:not used, 1:internal, -1:leaf 
+            internal = 0; //0:not used, 1:internal, -1:leaf
             //internal = false;
             depth = 0;
             base_router = 0;
             left = 0;
             right = 0;
             nl = 0.001; //initilze to 1, as we need to do nl/nr.
-            nr = 0.001; 
+            nr = 0.001;
             examples_index = v_init<uint32_t>();
         }
     };
@@ -169,7 +169,7 @@ namespace memory_tree_ns
 
         v_array<node> nodes;  //array of nodes.
         v_array<example*> examples; //array of example points
-        
+
         size_t max_leaf_examples;
         size_t max_nodes;
         size_t leaf_example_multiplier;
@@ -180,13 +180,13 @@ namespace memory_tree_ns
         int iter;
         uint32_t dream_repeats; //number of dream operations per example.
 
-        uint32_t total_num_queries; 
+        uint32_t total_num_queries;
 
         size_t max_depth;
         size_t max_ex_in_leaf;
 
         float construct_time; //recording the time for constructing the memory tree
-        float test_time; //recording the test time 
+        float test_time; //recording the test time
 
         uint32_t num_mistakes;
         int learn_at_leaf; //indicator for turning on learning the scorer function at the leaf level
@@ -198,11 +198,11 @@ namespace memory_tree_ns
 
         int top_K;  //commands:
         int oas; //indicator for multi-label classification (oas = 1)
-	    int dream_at_update; 
+	    int dream_at_update;
 
         int online; //indicator for running CMT in online fashion
 
-        float F1_score; 
+        float F1_score;
         float hamming_loss;
 
         example* kprod_ec;
@@ -225,23 +225,23 @@ namespace memory_tree_ns
     };
 
     float linear_kernel(const flat_example* fec1, const flat_example* fec2)
-    { 
+    {
         float dotprod = 0;
-   
+
         features& fs_1 = (features&)fec1->fs;
         features& fs_2 = (features&)fec2->fs;
         if (fs_2.indicies.size() == 0)
             return 0.f;
-   
+
         for (size_t idx1 = 0, idx2 = 0; idx1 < fs_1.size() && idx2 < fs_2.size() ; idx1++)
-        { 
+        {
             uint64_t ec1pos = fs_1.indicies[idx1];
             uint64_t ec2pos = fs_2.indicies[idx2];
             if(ec1pos < ec2pos) continue;
-   
+
             while(ec1pos > ec2pos && ++idx2 < fs_2.size())
                 ec2pos = fs_2.indicies[idx2];
-   
+
             if(ec1pos == ec2pos)
             {
                 dotprod += fs_1.values[idx1] * fs_2.values[idx2];
@@ -258,7 +258,7 @@ namespace memory_tree_ns
         flat_example* fec2 = flatten_sort_example(*b.all, ec2);
         float norm_sqrt = pow(fec1->total_sum_feat_sq*fec2->total_sum_feat_sq, 0.5f);
         float linear_prod = linear_kernel(fec1, fec2);
-        //fec1->fs.delete_v(); 
+        //fec1->fs.delete_v();
         //fec2->fs.delete_v();
         free_flatten_example(fec1);
         free_flatten_example(fec2);
@@ -303,7 +303,7 @@ namespace memory_tree_ns
 
     //rout based on the prediction
     inline uint32_t insert_descent(node& n, const float prediction)
-    { 
+    {
         //prediction <0 go left, otherwise go right
         if(prediction < 0){
             n.nl++; //increment the number of examples routed to the left.
@@ -311,7 +311,7 @@ namespace memory_tree_ns
         }
         else{ //otherwise go right.
             n.nr++; //increment the number of examples routed to the right.
-            return n.right; 
+            return n.right;
         }
     }
 
@@ -333,10 +333,10 @@ namespace memory_tree_ns
                 cout<<"Error:  nl = 0, and nr = 0, exit...";
                 exit(0);
             }
-            
+
             if (pred < 0){
                 b.nodes[cn].nl--;
-                cn = b.nodes[cn].left; 
+                cn = b.nodes[cn].left;
             }
             else{
                 b.nodes[cn].nr--;
@@ -347,10 +347,10 @@ namespace memory_tree_ns
         if (b.nodes[cn].examples_index.size() >= 1){
             int loc_at_leaf = int(merand48(b.all->random_state)*b.nodes[cn].examples_index.size());
 	        uint32_t ec_id = b.nodes[cn].examples_index[loc_at_leaf];
-            remove_at_index(b.nodes[cn].examples_index, loc_at_leaf); 
+            remove_at_index(b.nodes[cn].examples_index, loc_at_leaf);
             return ec_id;
         }
-        else    
+        else
             return -1;
     }
 
@@ -375,18 +375,18 @@ namespace memory_tree_ns
 
         ec.l.simple = {1.f, 1.f, 0.};
         base.predict(ec, b.nodes[cn].base_router);
-        float prediction = ec.pred.scalar; 
+        float prediction = ec.pred.scalar;
 	    //float imp_weight = 1.f; //no importance weight.
-        
+
         float weighted_value = (1.-b.alpha)*log(b.nodes[cn].nl/(b.nodes[cn].nr+1e-1))/log(2.)+b.alpha*prediction;
         float route_label = weighted_value < 0.f ? -1.f : 1.f;
-        
-        //ec.l.simple = {route_label, imp_weight, 0.f}; 
+
+        //ec.l.simple = {route_label, imp_weight, 0.f};
         float ec_input_weight = ec.weight;
         ec.weight = 1.f;
 	    ec.l.simple = {route_label, 1., 0.f};
         base.learn(ec, b.nodes[cn].base_router); //update the router according to the new example.
-        
+
         base.predict(ec, b.nodes[cn].base_router);
         float save_binary_scalar = ec.pred.scalar;
 
@@ -403,7 +403,7 @@ namespace memory_tree_ns
         return save_binary_scalar;
     }
 
-    
+
     //turn a leaf into an internal node, and create two children
     //when the number of examples is too big
     void split_leaf(memory_tree& b, single_learner& base, const uint32_t cn)
@@ -415,9 +415,9 @@ namespace memory_tree_ns
         b.nodes[left_child].internal = -1;  //left leaf
         b.nodes[left_child].base_router = (b.routers_used++);
         uint32_t right_child = (uint32_t)b.nodes.size();
-        b.nodes.push_back(node());  
+        b.nodes.push_back(node());
         b.nodes[right_child].internal = -1;  //right leaf
-        b.nodes[right_child].base_router = (b.routers_used++); 
+        b.nodes[right_child].base_router = (b.routers_used++);
 
         if (b.nodes[cn].depth + 1 > b.max_depth){
             b.max_depth = b.nodes[cn].depth + 1;
@@ -453,12 +453,12 @@ namespace memory_tree_ns
 
             b.examples[ec_pos]->l.simple = {1.f, 1.f, 0.f};
             base.predict(*b.examples[ec_pos], b.nodes[cn].base_router); //re-predict
-            float scalar = b.examples[ec_pos]->pred.scalar; //this is spliting the leaf. 
+            float scalar = b.examples[ec_pos]->pred.scalar; //this is spliting the leaf.
             if (scalar < 0)
             {
                 b.nodes[left_child].examples_index.push_back(ec_pos);
                 float leaf_pred = train_node(b, base, *b.examples[ec_pos], left_child);
-                insert_descent(b.nodes[left_child], leaf_pred); //fake descent, only for update nl and nr                
+                insert_descent(b.nodes[left_child], leaf_pred); //fake descent, only for update nl and nr
             }
             else
             {
@@ -488,16 +488,16 @@ namespace memory_tree_ns
     }
 
 
-    int compare_label( const void *a, const void *b) 
+    int compare_label( const void *a, const void *b)
     {
         return *(uint32_t*)a - *(uint32_t*)b;
     }
 
-    
-    
+
+
     inline uint32_t over_lap(v_array<uint32_t>& array_1, v_array<uint32_t>& array_2){
         uint32_t num_overlap = 0;
-        
+
         qsort(array_1.begin(), array_1.size(), sizeof(uint32_t), compare_label);
         qsort(array_2.begin(), array_2.size(), sizeof(uint32_t), compare_label);
 
@@ -529,13 +529,13 @@ namespace memory_tree_ns
     void collect_labels_from_leaf(memory_tree& b, const uint32_t cn, v_array<uint32_t>& leaf_labs){
         if (b.nodes[cn].internal != -1)
             cout<<"something is wrong, it should be a leaf node"<<endl;
-    
+
         leaf_labs.clear();
         for (size_t i = 0; i < b.nodes[cn].examples_index.size(); i++){ //scan through each memory in the leaf
             uint32_t loc = b.nodes[cn].examples_index[i];
             for (uint32_t lab: b.examples[loc]->l.multilabels.label_v){ //scan through each label:
                 if (v_array_contains(leaf_labs, lab) == false)
-                    leaf_labs.push_back(lab); 
+                    leaf_labs.push_back(lab);
             }
         }
     }
@@ -556,7 +556,7 @@ namespace memory_tree_ns
         ec.l.multilabels = multilabels;
     }
 
-     inline uint32_t compute_hamming_loss_via_oas(memory_tree& b, single_learner& base, 
+     inline uint32_t compute_hamming_loss_via_oas(memory_tree& b, single_learner& base,
 		    const uint32_t cn, example& ec, v_array<uint32_t>& selected_labs)
     {
 	    selected_labs.delete_v();
@@ -590,9 +590,9 @@ namespace memory_tree_ns
                 float score = 0.f;
                 uint32_t loc = b.nodes[cn].examples_index[i];
 
-                //do not use reward to update memory tree during the very first pass 
+                //do not use reward to update memory tree during the very first pass
                 //(which is for unsupervised training for memory tree)
-                if (b.learn_at_leaf == true && b.current_pass >= 1){ 
+                if (b.learn_at_leaf == true && b.current_pass >= 1){
                     float tmp_s = normalized_linear_prod(b, &ec, b.examples[loc]);
                     diag_kronecker_product_test(ec, *b.examples[loc], *b.kprod_ec, b.oas );
                     b.kprod_ec->l.simple = {FLT_MAX, 0., tmp_s};
@@ -601,7 +601,7 @@ namespace memory_tree_ns
                 }
                 else
                     score = normalized_linear_prod(b, &ec, b.examples[loc]);
-                
+
                 if (score > max_score){
                     max_score = score;
                     max_pos = (int64_t)loc;
@@ -613,12 +613,12 @@ namespace memory_tree_ns
             return -1;
     }
 
-    //for any two examples, use number of overlap labels to indicate the similarity between these two examples. 
+    //for any two examples, use number of overlap labels to indicate the similarity between these two examples.
     float get_overlap_from_two_examples(example& ec1, example& ec2){
         return (float)over_lap(ec1.l.multilabels.label_v, ec2.l.multilabels.label_v);
     }
 
-    //we use F1 score as the reward signal 
+    //we use F1 score as the reward signal
     float F1_score_for_two_examples(example& ec1, example& ec2){
         float num_overlaps = get_overlap_from_two_examples(ec1, ec2);
 	    float v1 = num_overlaps/(1e-7+ec1.l.multilabels.label_v.size()*1.);
@@ -630,9 +630,9 @@ namespace memory_tree_ns
             return 2.*(v1*v2/(v1+v2));
     }
 
-    
+
     void predict(memory_tree& b, single_learner& base, example& ec)
-    {  
+    {
         MULTICLASS::label_t mc;
         uint32_t save_multi_pred = 0;
         MULTILABEL::labels multilabels;
@@ -654,13 +654,13 @@ namespace memory_tree_ns
             uint32_t newcn = ec.pred.scalar < 0 ? b.nodes[cn].left : b.nodes[cn].right; //do not need to increment nl and nr.
             cn = newcn;
         }
-        
+
         if(b.oas == false){
-            ec.l.multi = mc; 
+            ec.l.multi = mc;
             ec.pred.multiclass = save_multi_pred;
         }
         else{
-            ec.pred.multilabels = preds; 
+            ec.pred.multilabels = preds;
             ec.l.multilabels = multilabels;
         }
 
@@ -671,7 +671,7 @@ namespace memory_tree_ns
                 ec.pred.multiclass = b.examples[closest_ec]->l.multi.label;
             else
                 ec.pred.multiclass = 0;
-            
+
             if (ec.l.multi.label != ec.pred.multiclass){
                 ec.loss = ec.weight;
                 b.num_mistakes++;
@@ -679,7 +679,7 @@ namespace memory_tree_ns
         }
         else{
             float reward = 0.f;
-            closest_ec = pick_nearest(b, base, cn, ec); 
+            closest_ec = pick_nearest(b, base, cn, ec);
             if (closest_ec != -1){
                 reward = F1_score_for_two_examples(ec, *b.examples[closest_ec]);
                 b.F1_score += reward;
@@ -711,9 +711,9 @@ namespace memory_tree_ns
             float prediction = ec.pred.scalar;
             cn = prediction < 0 ? b.nodes[cn].left : b.nodes[cn].right;
         }
-        
+
         if(b.oas == false){
-            ec.l.multi = mc; 
+            ec.l.multi = mc;
             ec.pred.multiclass = save_multi_pred;
         }
         else{
@@ -759,7 +759,7 @@ namespace memory_tree_ns
         }
         if (ec_id != -1){
             if (b.examples[ec_id]->l.multi.label == ec.l.multi.label)
-                reward = 1.f;             
+                reward = 1.f;
             float score = normalized_linear_prod(b, &ec, b.examples[ec_id]);
             diag_kronecker_product_test(ec, *b.examples[ec_id], *b.kprod_ec, b.oas);
          	b.kprod_ec->l.simple = {reward, 1.f, -score};
@@ -771,7 +771,7 @@ namespace memory_tree_ns
 
     void route_to_leaf(memory_tree& b, single_learner& base, const uint32_t & ec_array_index, uint32_t cn, v_array<uint32_t>& path, bool insertion){
 		example& ec = *b.examples[ec_array_index];
-        
+
         MULTICLASS::label_t mc;
         uint32_t save_multi_pred = 0;
         MULTILABEL::labels multilabels;
@@ -796,10 +796,10 @@ namespace memory_tree_ns
 			else
 			    cn = insert_descent(b.nodes[cn], prediction);
 		}
-		path.push_back(cn); //push back the leaf 
-        
+		path.push_back(cn); //push back the leaf
+
         if (b.oas == false){
-            ec.l.multi = mc; 
+            ec.l.multi = mc;
             ec.pred.multiclass = save_multi_pred;
         }
         else{
@@ -824,8 +824,8 @@ namespace memory_tree_ns
             //uint32_t random_pos = merand48(b.all->random_state)*(path_to_leaf.size()-1);
             uint32_t random_pos = merand48(b.all->random_state)*(path_to_leaf.size()); //include leaf
             uint32_t cn = path_to_leaf[random_pos];
-            
-            if (b.nodes[cn].internal != -1){ //if it's an internal node:' 
+
+            if (b.nodes[cn].internal != -1){ //if it's an internal node:'
                 float objective = 0.f;
                 float prob_right = 0.5;
                 float coin = merand48(b.all->random_state) < prob_right ? 1.f : -1.f;
@@ -838,9 +838,9 @@ namespace memory_tree_ns
                     float reward_right_subtree= return_reward_from_node(b,base, b.nodes[cn].right, ec, weight);
                     objective = (1.-b.alpha)*log(b.nodes[cn].nl/b.nodes[cn].nr) + b.alpha*(reward_right_subtree/prob_right)/2.;
                 }
-                
+
                 float ec_input_weight = ec.weight;
-                
+
                 MULTICLASS::label_t mc;
                 MULTILABEL::labels multilabels;
                 MULTILABEL::labels preds;
@@ -850,7 +850,7 @@ namespace memory_tree_ns
                     multilabels = ec.l.multilabels;
                     preds = ec.pred.multilabels;
                 }
-                
+
                 ec.weight = fabs(objective);
                 if (ec.weight >= 100.f) //crop the weight, otherwise sometimes cause NAN outputs.
                     ec.weight = 100.f;
@@ -858,7 +858,7 @@ namespace memory_tree_ns
                     ec.weight = 0.01f;
                 ec.l.simple = {objective < 0. ? -1.f : 1.f, 1.f, 0.};
                 base.learn(ec, b.nodes[cn].base_router);
-               
+
                 if (b.oas == false)
                     ec.l.multi = mc;
                 else{
@@ -879,30 +879,30 @@ namespace memory_tree_ns
         path_to_leaf.delete_v();
     }
 
-    //using reward signals 
-    void update_rew(memory_tree& b, single_learner& base, const uint32_t& ec_array_index, example& ec) 
+    //using reward signals
+    void update_rew(memory_tree& b, single_learner& base, const uint32_t& ec_array_index, example& ec)
     {
         single_query_and_learn(b, base, ec_array_index, ec);
     }
 
-    //node here the ec is already stored in the b.examples, the task here is to rout it to the leaf, 
+    //node here the ec is already stored in the b.examples, the task here is to rout it to the leaf,
     //and insert the ec_array_index to the leaf.
     void insert_example(memory_tree& b, single_learner& base, const uint32_t& ec_array_index, bool fake_insert = false)
     {
         uint32_t cn = 0; //start from the root.
         while(b.nodes[cn].internal == 1) //if it's internal node:
-        {   
+        {
             //predict and train the node at cn.
-            float router_pred = train_node(b, base, *b.examples[ec_array_index], cn); 
+            float router_pred = train_node(b, base, *b.examples[ec_array_index], cn);
             uint32_t newcn = insert_descent(b.nodes[cn], router_pred); //updated nr or nl
-            cn = newcn; 
+            cn = newcn;
         }
-    
+
         if (b.oas == true)  //if useing oas as inference procedure, we just train oas here, as it's independent of the memory unit anyway'
             train_one_against_some_at_leaf(b, base, cn, *b.examples[ec_array_index]);
 
         if((b.nodes[cn].internal == -1) && (fake_insert == false)) //get to leaf:
-        {   
+        {
             b.nodes[cn].examples_index.push_back(ec_array_index);
             if (b.nodes[cn].examples_index.size() > b.max_ex_in_leaf)
             {
@@ -913,14 +913,14 @@ namespace memory_tree_ns
 
             //if the number of examples exceeds the max_leaf_examples, and not reach the max_nodes - 2 yet, we split:
             if((b.nodes[cn].examples_index.size() >= b.max_leaf_examples) && (b.nodes.size() + 2 <= b.max_nodes)){
-                split_leaf(b, base, cn); 
+                split_leaf(b, base, cn);
             }
         }
     }
 
     void experience_replay(memory_tree& b, single_learner& base)
     {
-        uint32_t cn = 0; //start from root, randomly descent down! 
+        uint32_t cn = 0; //start from root, randomly descent down!
 	    int ec_id = random_sample_example_pop(b,cn);
 	    if (ec_id >= 0){
             if (b.current_pass < 1)
@@ -941,11 +941,11 @@ namespace memory_tree_ns
     //learn: descent the example from the root while generating binary training
     //example for each node, including the leaf, and store the example at the leaf.
     void learn(memory_tree& b, single_learner& base, example& ec)
-    {        
+    {
         if (b.test_mode == false){
             b.iter++;
             predict(b, base, ec);
-            
+
             if (b.iter%5000 == 0){
                 if (b.oas == false)
 	                cout<<"at iter "<<b.iter<<", top("<<b.top_K<<") pred error: "<<b.num_mistakes*1./b.iter<<", total num queires so far: "<<b.total_num_queries<<", max depth: "<<b.max_depth<<", max exp in leaf: "<<b.max_ex_in_leaf<<endl;
@@ -954,15 +954,15 @@ namespace memory_tree_ns
             }
 
             clock_t begin = clock();
-        
+
             if (b.current_pass < 1){ //in the first pass, we need to store the memory:
                 example* new_ec = &calloc_or_throw<example>();
                 copy_example_data(new_ec, &ec, b.oas);
-                b.examples.push_back(new_ec);   
+                b.examples.push_back(new_ec);
                 if(b.online == true)
                     update_rew(b, base, b.examples.size() - 1,*b.examples[b.examples.size()-1]); //query and learn
-                
-                insert_example(b, base, b.examples.size() - 1); //unsupervised learning. 
+
+                insert_example(b, base, (uint32_t)(b.examples.size() - 1)); //unsupervised learning.
                 for (uint32_t i = 0; i < b.dream_repeats; i++)
                     experience_replay(b, base);
             }
@@ -972,7 +972,7 @@ namespace memory_tree_ns
 		        for (uint32_t i = 0; i < b.dream_repeats; i++)
 		            experience_replay(b, base);
             }
-            b.construct_time += double(clock() - begin)/CLOCKS_PER_SEC;   
+            b.construct_time += double(clock() - begin)/CLOCKS_PER_SEC;
         }
         else if (b.test_mode == true){
             b.iter++;
@@ -987,7 +987,7 @@ namespace memory_tree_ns
             b.test_time += double(clock() - begin)/CLOCKS_PER_SEC;
         }
 
-    } 
+    }
 
 
     void end_pass(memory_tree& b){
@@ -1043,7 +1043,7 @@ namespace memory_tree_ns
         }
         for (uint32_t i = 0; i < tag_number; i++)
             writeit(ec->tag[i], "tag");
-        
+
         //deal with tag:
         writeitvar(ec->indices.size(), "namespaces", namespace_size);
         if (read){
@@ -1112,12 +1112,12 @@ namespace memory_tree_ns
                 size_t ss = b.all->weights.stride_shift();
                 writeit(ss, "stride_shift");
             }
-            
+
             writeit(b.max_nodes, "max_nodes");
             writeit(b.learn_at_leaf, "learn_at_leaf");
             writeit(b.oas, "oas");
 	        //writeit(b.leaf_example_multiplier, "leaf_example_multiplier")
-            writeitvar(b.nodes.size(), "nodes", n_nodes); 
+            writeitvar(b.nodes.size(), "nodes", n_nodes);
             writeit(b.max_num_labels, "max_number_of_labels");
 
             if (read){
@@ -1125,8 +1125,8 @@ namespace memory_tree_ns
                 for (uint32_t i = 0; i < n_nodes; i++)
                     b.nodes.push_back(node());
             }
-            
-            //node  
+
+            //node
             for(uint32_t i = 0; i < n_nodes; i++){
                 save_load_node(b.nodes[i], model_file, read, text, msg);
             }
@@ -1142,8 +1142,8 @@ namespace memory_tree_ns
             for (uint32_t i = 0; i < n_examples; i++)
                 save_load_example(b.examples[i], model_file, read, text, msg, b.oas);
 	    //cout<<"done loading...."<<endl;
-            
-            
+
+
         }
     }
     //////////////////////////////End of Save & Load///////////////////////////////
@@ -1177,24 +1177,24 @@ if (!tree->max_nodes)
 
     tree->max_leaf_examples = tree->leaf_example_multiplier*(log(tree->max_nodes)/log(2));
 
-    
+
     init_tree(*tree);
 
     if (!all.quiet)
         all.trace_message << "memory_tree:" << " "
-            <<"max_nodes = "<< tree->max_nodes << " " 
+            <<"max_nodes = "<< tree->max_nodes << " "
             <<"max_leaf_examples = "<<tree->max_leaf_examples<<" "
  		    <<"alpha = "<<tree->alpha<<" "
             <<"oas = "<<tree->oas<<" "
             <<"online ="<<tree->online<<" "
             <<std::endl;
-    
+
     size_t num_learners = 0;
-    
+
     //multi-class classification
     if (tree->oas == false){
         num_learners = tree->max_nodes + 1;
-        learner<memory_tree, example>& l = init_multiclass_learner (tree, 
+        learner<memory_tree, example>& l = init_multiclass_learner (tree,
                 as_singleline(setup_base (options,all)),
                 learn,
                 predict,
@@ -1208,11 +1208,11 @@ if (!tree->max_nodes)
     } //multi-label classification
     else{
         num_learners = tree->max_nodes + 1 + tree->max_num_labels;
-        learner<memory_tree,example>& l = 
-        init_learner(tree, 
-                as_singleline(setup_base(options,all)), 
-                learn, 
-                predict, 
+        learner<memory_tree,example>& l =
+        init_learner(tree,
+                as_singleline(setup_base(options,all)),
+                learn,
+                predict,
                 num_learners,
                 prediction_type::multilabels);
 
@@ -1233,7 +1233,3 @@ if (!tree->max_nodes)
     }
 
 }
-
-
-
-
diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc
index 9061679162c..73b122c37a4 100644
--- a/vowpalwabbit/parse_args.cc
+++ b/vowpalwabbit/parse_args.cc
@@ -586,7 +586,7 @@ string spoof_hex_encoded_namespaces(const string& arg)
   return res;
 }
 
-void parse_feature_tweaks(options_i& options, vw& all)
+void parse_feature_tweaks(options_i& options, vw& all, vector<string>& dictionary_nses)
 {
   string hash_function("strings");
   uint32_t new_bits;
@@ -598,7 +598,6 @@ void parse_feature_tweaks(options_i& options, vw& all)
   vector<string> ignore_linears;
   vector<string> keeps;
   vector<string> redefines;
-  vector<string> dictionary_nses;
 
   vector<string> dictionary_path;
 
@@ -990,7 +989,6 @@ void parse_feature_tweaks(options_i& options, vw& all)
       all.dictionary_path.push_back(PATH.substr(previous));
     }
 
-    for (size_t id = 0; id < dictionary_nses.size(); id++) parse_dictionary_argument(all, dictionary_nses[id]);
   }
 
   if (noconstant)
@@ -1501,14 +1499,14 @@ options_i& load_header_merge_options(options_i& options, vw& all, io_buf& model)
   return options;
 }
 
-void parse_modules(options_i& options, vw& all)
+void parse_modules(options_i& options, vw& all, vector<string>& dictionary_nses)
 {
   option_group_definition rand_options("Randomization options");
   rand_options.add(make_option("random_seed", all.random_seed).help("seed random number generator"));
   options.add_and_parse(rand_options);
   all.random_state = all.random_seed;
 
-  parse_feature_tweaks(options, all);  // feature tweaks
+  parse_feature_tweaks(options, all, dictionary_nses);  // feature tweaks
 
   parse_example_tweaks(options, all);  // example manipulation
 
@@ -1634,10 +1632,14 @@ vw* initialize(
     // Loads header of model files and loads the command line options into the options object.
     load_header_merge_options(options, all, *model);
 
-    parse_modules(options, all);
+    vector<string> dictionary_nses;
+    parse_modules(options, all, dictionary_nses);
 
     parse_sources(options, all, *model, skipModelLoad);
 
+    //we must delay so parse_mask is fully defined.
+    for (size_t id = 0; id < dictionary_nses.size(); id++) parse_dictionary_argument(all, dictionary_nses[id]);
+
     options.check_unregistered();
 
     // upon direct query for help -- spit it out to stdout;
diff --git a/vowpalwabbit/recall_tree.cc b/vowpalwabbit/recall_tree.cc
index a1b8f2d38e6..56574793eae 100644
--- a/vowpalwabbit/recall_tree.cc
+++ b/vowpalwabbit/recall_tree.cc
@@ -430,21 +430,6 @@ void finish(recall_tree& b)
   b.nodes.delete_v();
 }
 
-#define writeit(what, str)                                                                  \
-  do                                                                                        \
-  {                                                                                         \
-    msg << str << " = " << what << " ";                                                     \
-    bin_text_read_write_fixed(model_file, (char*)&what, sizeof(what), "", read, msg, text); \
-  } while (0);
-
-#define writeitvar(what, str, mywhat)                                                           \
-  auto mywhat = (what);                                                                         \
-  do                                                                                            \
-  {                                                                                             \
-    msg << str << " = " << mywhat << " ";                                                       \
-    bin_text_read_write_fixed(model_file, (char*)&mywhat, sizeof(mywhat), "", read, msg, text); \
-  } while (0);
-
 void save_load_tree(recall_tree& b, io_buf& model_file, bool read, bool text)
 {
   if (model_file.files.size() > 0)
diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc
index 5d386e9fc11..bc5dbbad52a 100644
--- a/vowpalwabbit/warm_cb.cc
+++ b/vowpalwabbit/warm_cb.cc
@@ -297,7 +297,7 @@ bool ind_update(warm_cb& data, int ec_type)
 float compute_weight_multiplier(warm_cb& data, size_t i, int ec_type)
 {
 	float weight_multiplier;
-	float ws_train_size = data.ws_train_size;
+	float ws_train_size = (float)data.ws_train_size;
 	float inter_train_size = data.inter_period;
 	float total_train_size = ws_train_size + inter_train_size;
 	float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size;
@@ -531,7 +531,7 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec)
 
 }
 
-void init_adf_data(warm_cb& data, const size_t num_actions)
+void init_adf_data(warm_cb& data, const uint32_t num_actions)
 {
   data.num_actions = num_actions;
   if (data.sim_bandit)