A fix for sparse model parameters and unknown features (#1334)

* modify confidence (and active learning) to more correctly reflect the updates * little bump * another little bump * remove some warning * fix memory leak with sparse weights and new features * fix a windows build issue
VowpalWabbit · Oct 5, 2017 · 98b8038 · 98b8038
1 parent 10364d6
commit 98b8038
Show file tree

Hide file tree

Showing 8 changed files with 120 additions and 134 deletions.
diff --git a/cs/cli/vowpalwabbit.cpp b/cs/cli/vowpalwabbit.cpp
@@ -1,4 +1,4 @@
-/*
+/*
 Copyright (c) by respective owners including Yahoo!, Microsoft, and
 individual contributors. All rights reserved.  Released under a BSD (revised)
 license as described in the file LICENSE.
@@ -293,7 +293,7 @@ List<VowpalWabbitExample^>^ VowpalWabbit::ParseDecisionServiceJson(cli::array<By
 			int index = 0;
 			for (auto a : interaction.actions)
 				header->Actions[index++] = (int)a;
-			
+
 			header->Probabilities = gcnew cli::array<float>((int)interaction.probabilities.size());
 			index = 0;
 			for (auto p : interaction.probabilities)
@@ -339,7 +339,7 @@ List<VowpalWabbitExample^>^ VowpalWabbit::ParseDecisionServiceJson(cli::array<By
 			  examples.push_back(native_example);
 
 			  interior_ptr<ParseJsonState^> state_ptr = &state;
-			  
+
 			  if (m_vw->audit)
 				VW::read_line_json<true>(*m_vw, examples, reinterpret_cast<char*>(valueHandle.AddrOfPinnedObject().ToPointer()), get_example_from_pool, &state);
 			  else
@@ -818,14 +818,14 @@ cli::array<cli::array<float>^>^ VowpalWabbit::FillTopicAllocation(T& weights)
 
 	for (auto iter = weights.begin(); iter != weights.end(); ++iter)
 	{   // over topics
-		auto v = iter.begin();
-		for (uint64_t k = 0; k < K; k++, ++v)
-			allocation[(int)k][(int)iter.index()] = *v + lda_rho;
+		weight* wp = &(*iter);
+		for (uint64_t k = 0; k < K; k++)
+			allocation[(int)k][(int)iter.index()] = wp[k] + lda_rho;
 	}
 
 	return allocation;
 }
-  
+
 cli::array<cli::array<float>^>^  VowpalWabbit::GetTopicAllocation()
 {
 	// over weights
@@ -835,4 +835,3 @@ cli::array<cli::array<float>^>^  VowpalWabbit::GetTopicAllocation()
 		return FillTopicAllocation(m_vw->weights.dense_weights);
   }
 }
-
diff --git a/vowpalwabbit/array_parameters.h b/vowpalwabbit/array_parameters.h
@@ -17,36 +17,6 @@ class dense_parameters;
 class sparse_parameters;
 typedef std::unordered_map<uint64_t, weight*> weight_map;
 
-class weight_iterator_iterator
-{
-private:
-	weight* _cur;
-public:
-	weight_iterator_iterator(weight* cur)
-		: _cur(cur)
-	{ }
-
-	weight& operator*() { return *_cur; }
-
-	weight_iterator_iterator& operator++()
-	{
-		++_cur;
-		return *this;
-	}
-
-	weight_iterator_iterator operator+(size_t index) { return weight_iterator_iterator(_cur + index); }
-
-	weight_iterator_iterator& operator+=(size_t index)
-	{
-		_cur += index;
-		return *this;
-	}
-
-	bool operator==(const weight_iterator_iterator& rhs) const { return _cur == rhs._cur; }
-	bool operator!=(const weight_iterator_iterator& rhs) const { return _cur != rhs._cur; }
-
-};
-
 template <typename T>
 class dense_iterator
 {
@@ -62,8 +32,6 @@ class dense_iterator
 	typedef  T* pointer;
 	typedef  T& reference;
 
-	typedef weight_iterator_iterator w_iter;
-
 	dense_iterator(T* current, T* begin, uint32_t stride)
 		: _current(current), _begin(begin), _stride(stride)
 	{ }
@@ -80,11 +48,6 @@ class dense_iterator
 
 	bool operator==(const dense_iterator& rhs) const { return _current == rhs._current; }
 	bool operator!=(const dense_iterator& rhs) const { return _current != rhs._current; }
-
-	//to iterate within a bucket
-	w_iter begin() { return w_iter(_current); }
-	w_iter end() { return w_iter(_current + _stride); }
-	w_iter end(size_t offset) { return w_iter(_current + offset); }
 };
 
 class dense_parameters
@@ -141,14 +104,14 @@ class dense_parameters
 	{
 	  iterator iter = begin();
 	  for (size_t i = 0; iter != end(); ++iter, i += stride())
-	    T::func(iter, info);
+	    T::func(*iter, info, iter.index());
 	}
 
 	template<class T> void set_default()
 	{
 	  iterator iter = begin();
 	  for (size_t i = 0; iter != end(); ++iter, i += stride())
-	    T::func(iter);
+	    T::func(*iter, iter.index());
 	}
 
 	void set_zero(size_t offset)
@@ -157,13 +120,13 @@ class dense_parameters
 			(&(*iter))[offset] = 0;
 	}
 
-	uint64_t mask()	{ return _weight_mask;	}
+	uint64_t mask()	const { return _weight_mask;	}
 
-	uint64_t seeded() { return _seeded; }
+	uint64_t seeded() const { return _seeded; }
 
-	uint32_t stride() { return 1 << _stride_shift; }
+	uint32_t stride() const { return 1 << _stride_shift; }
 
-	uint32_t stride_shift() { return _stride_shift; }
+	uint32_t stride_shift() const { return _stride_shift; }
 
 	void stride_shift(uint32_t stride_shift) { _stride_shift = stride_shift; }
 
@@ -202,8 +165,6 @@ class sparse_iterator
 	typedef  T* pointer;
 	typedef  T& reference;
 
-	typedef weight_iterator_iterator w_iter;
-
 	sparse_iterator(weight_map::iterator& iter, uint32_t stride)
 		: _iter(iter), _stride(stride)
 	{ }
@@ -227,11 +188,6 @@ class sparse_iterator
 
 	bool operator==(const sparse_iterator& rhs) const { return _iter == rhs._iter; }
 	bool operator!=(const sparse_iterator& rhs) const { return _iter != rhs._iter; }
-
-	//to iterate within a bucket
-	w_iter begin() { return w_iter(_iter->second);}
-	w_iter end() { return w_iter(_iter->second + _stride); }
-	w_iter end(size_t offset) { return w_iter(_iter->second + offset);}
 };
 
 
@@ -244,24 +200,25 @@ class sparse_parameters
 	bool _seeded; // whether the instance is sharing model state with others
 	bool _delete;
 	void* default_data;
+  float* default_value;
 public:
 	typedef sparse_iterator<weight> iterator;
 	typedef sparse_iterator<const weight> const_iterator;
  private:
-	void(*fun)(iterator&, void*);
+	void(*fun)(const weight*, void*);
  public:
 
 	sparse_parameters(size_t length, uint32_t stride_shift = 0)
 		: _map(),
 		_weight_mask((length << stride_shift) - 1),
 		_stride_shift(stride_shift),
 		_seeded(false), _delete(false), default_data(nullptr),
-		fun(nullptr)
-	{}
+    fun(nullptr)
+	{ default_value = calloc_mergable_or_throw<weight>(stride());}
 
 	sparse_parameters()
 		: _map(), _weight_mask(0), _stride_shift(0), _seeded(false), _delete(false), default_data(nullptr), fun(nullptr)
-	{}
+	{ default_value = calloc_mergable_or_throw<weight>(stride());}
 
 	bool not_null() { return (_weight_mask > 0 && !_map.empty()); }
 
@@ -280,20 +237,24 @@ class sparse_parameters
 
 	inline weight& operator[](size_t i)
 	{   uint64_t index = i & _weight_mask;
-		weight_map::iterator iter = _map.find(index);
+    weight_map::iterator iter = _map.find(index);
 		if (iter == _map.end())
 		  {     _map.insert(std::make_pair(index, calloc_mergable_or_throw<weight>(stride())));
 			iter = _map.find(index);
 			if (fun != nullptr)
-			  {
-			    iterator j(iter,stride());
-			    fun(j, default_data);
-			  }
-			iter = _map.find(index);
+        fun(iter->second, default_data);
 		}
 		return *(iter->second);
 	}
 
+  inline const weight& operator[](size_t i) const
+	{   uint64_t index = i & _weight_mask;
+		weight_map::const_iterator iter = _map.find(index);
+		if (iter == _map.end())
+      return *default_value;
+		return *(iter->second);
+  }
+
 	inline weight& strided_index(size_t index) { return operator[](index << _stride_shift); }
 
 	void shallow_copy(const sparse_parameters& input)
@@ -307,6 +268,10 @@ class sparse_parameters
 		_map = input._map;
 		_weight_mask = input._weight_mask;
 		_stride_shift = input._stride_shift;
+    free(default_value);
+    default_value = calloc_mergable_or_throw<weight>(stride());
+    memcpy(default_value, input.default_value, stride());
+    default_data = input.default_data;
 		_seeded = true;
 	}
 
@@ -315,26 +280,33 @@ class sparse_parameters
 	  R& new_R = calloc_or_throw<R>();
 	  new_R = info;
 	  default_data = &new_R;
-	  fun = (void(*)(iterator&, void*))T::func;
+	  fun = (void(*)(const weight*, void*))T::func;
+    fun(default_value, default_data);
 	}
 
-	template<class T> void set_default() { fun = (void(*)(iterator&, void*))T::func; }
+	template<class T> void set_default() { fun = (void(*)(const weight*, void*))T::func; }
 
 	void set_zero(size_t offset)
 	{
 		for (weight_map::iterator iter = _map.begin(); iter != _map.end(); ++iter)
 			(&(*(iter->second)))[offset] = 0;
 	}
 
-	uint64_t mask()	{ return _weight_mask; }
+	uint64_t mask()	const { return _weight_mask; }
 
-	uint64_t seeded() { return _seeded; }
+	uint64_t seeded() const { return _seeded; }
 
-	uint32_t stride() { return 1 << _stride_shift; }
+	uint32_t stride() const { return 1 << _stride_shift; }
 
-	uint32_t stride_shift()	{ return _stride_shift; }
+	uint32_t stride_shift()	const { return _stride_shift; }
 
-	void stride_shift(uint32_t stride_shift) { _stride_shift = stride_shift; }
+	void stride_shift(uint32_t stride_shift) {
+    _stride_shift = stride_shift;
+    free(default_value);
+    default_value = calloc_mergable_or_throw<weight>(stride());
+    if (fun != nullptr)
+      fun(default_value, default_data);
+  }
 
 #ifndef _WIN32
 	void share(size_t length)
@@ -352,6 +324,7 @@ class sparse_parameters
 		}
     if (default_data != nullptr)
       free(default_data);
+    free(default_value);
 	}
 };
 

diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc
@@ -851,11 +851,11 @@ void save_load_online_state(vw& all, io_buf& model_file, bool read, bool text, g
   template<class T> class set_initial_gd_wrapper
   {
       public:
-          static void func(typename T::iterator& iter, pair<float,float>& initial)
-              {
-                (&(*iter))[0] = initial.first;
-                (&(*iter))[1] = initial.second;
-              }
+    static void func(weight& w, pair<float,float>& initial, uint64_t index)
+    {
+      w = initial.first;
+      (&w)[1] = initial.second;
+    }
   };
 
 void save_load(gd& g, io_buf& model_file, bool read, bool text)

diff --git a/vowpalwabbit/gd.h b/vowpalwabbit/gd.h
@@ -25,7 +25,7 @@ void save_load_regressor(vw& all, io_buf& model_file, bool read, bool text);
 void save_load_online_state(vw& all, io_buf& model_file, bool read, bool text, GD::gd *g = nullptr);
 
  template <class T>
-   struct multipredict_info { size_t count; size_t step; polyprediction* pred; T& weights; /* & for l1: */ float gravity; };
+   struct multipredict_info { size_t count; size_t step; polyprediction* pred; const T& weights; /* & for l1: */ float gravity; };
 
 template<class T>
 inline void vec_add_multipredict(multipredict_info<T>& mp, const float fx, uint64_t fi)
@@ -57,9 +57,20 @@ inline void foreach_feature(W& weights, features& fs, R& dat, uint64_t offset =
   for (features::iterator& f : fs)
       T(dat, mult*f.value(), weights[(f.index() + offset)]);
 }
-  
+
  // iterate through one namespace (or its part), callback function T(some_data_R, feature_value_x, feature_weight)
-template <class R, void (*T)(R&, const float, float&)>
+template <class R, void (*T)(R&, const float, const float&), class W>
+inline void foreach_feature(const W& weights, features& fs, R& dat, uint64_t offset = 0, float mult = 1.)
+{
+  for (features::iterator& f : fs)
+    {
+      const weight& w = weights[(f.index() + offset)];
+      T(dat, mult*f.value(), w);
+    }
+}
+
+ // iterate through one namespace (or its part), callback function T(some_data_R, feature_value_x, feature_weight)
+template <class R, typename T>
 inline void foreach_feature(vw& all, features& fs, R& dat, uint64_t offset = 0, float mult = 1.)
 {
   if (all.weights.sparse)
@@ -114,10 +125,13 @@ inline void foreach_feature(vw& all, example& ec, R& dat)
 // iterate through all namespaces and quadratic&cubic features, callback function T(some_data_R, feature_value_x, feature_weight)
 template <class R, void (*T)(R&, float, float&)>
 inline void foreach_feature(vw& all, example& ec, R& dat)
-{ foreach_feature<R,float&,T>(all, ec, dat);
-}
+{ foreach_feature<R,float&,T>(all, ec, dat);}
+
+template <class R, void (*T)(R&, float, const float&)>
+inline void foreach_feature(vw& all, example& ec, R& dat)
+{ foreach_feature<R,const float&,T>(all, ec, dat);}
 
-inline void vec_add(float& p, const float fx, float& fw) { p += fw * fx; }
+ inline void vec_add(float& p, const float fx, const float& fw) { p += fw * fx; }
 
 inline float inline_predict(vw& all, example& ec)
 { float temp = ec.l.simple.initial;

diff --git a/vowpalwabbit/gd_mf.cc b/vowpalwabbit/gd_mf.cc
@@ -220,15 +220,15 @@ void mf_train(gdmf& d, example& ec)
 		mf_train(d, ec, d.all->weights.dense_weights);
 }
 
-template <class T> class set_rand_wrapper 
+template <class T> class set_rand_wrapper
 {
 public:
-    
-    static void func(typename T::iterator& iter, uint32_t& stride)
+
+  static void func(weight& w, uint32_t& stride, uint64_t index)
     {
-      uint64_t index = iter.index();
-      for (weight_iterator_iterator w = iter.begin(); w != iter.end(stride); ++w, ++index)
-        *w = (float)(0.1 * merand48(index));
+      weight* pw=&w;
+      for (size_t i =0 ; i != stride; ++i, ++index)
+        pw[i] = (float)(0.1 * merand48(index));
     }
 };