Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 248 lines (211 sloc) 9.04 KB
import numpy as np
import math
import copy
"""
Notes:
20111110 -- Tried with different weights:
* Single positive and negative weight shuffled: the perceptron finds the
max easily. Not a good strategy for the Dater.
* All weight distributed evenly: tends to perform better than random
weights. This is not a good strategy for the Dater.
* Tried weights using Beta distribution and params
(.5, .5), (2., 5.), (2., 2.)
but nothing seems to perform better than Gauss. It seems the same, which
I suspect has to do with the very small precision offered by 100 dims and
2 decimal places of precision.
* Tried weights for Gamma with k = 2, theta = 2. Same results.
"""
def generate_weights(weight_gen, partition=None, dims=100, precision=2):
"""
Generate random weights in [-1, 1]. The positive weights are grouped at
the head of the vector followed by the negative weights.
"""
max_precision = math.pow(10, precision)
def gen_weights_with_precision(num_weights):
""" Generate weights with the requested precision. """
weights = np.array([np.abs(weight_gen()) for i in range(num_weights)])
weight_norm = weights / np.sum(weights)
weight_norm_trunc = np.asarray(weight_norm * max_precision, dtype=int)
sum_except_last = np.sum(weight_norm_trunc[:-1])
weight_norm_trunc[-1] = max_precision - sum_except_last
return weight_norm_trunc
# Partition the weights.
if partition is None:
num_pos = min(1 + int(np.random.uniform() * dims), dims - 1)
num_neg = (dims - num_pos)
partition = (num_pos, num_neg)
else:
assert(len(partition) is 2)
dims = sum(partition)
# Generate weights.
pos_weights = gen_weights_with_precision(partition[0])
neg_weights = -1 * gen_weights_with_precision(partition[1])
# Combine weights.
weights = np.hstack([pos_weights, neg_weights])
return weights / max_precision
class Dater(object):
""" The dater will rate the candidate dates. """
# Number of candidates that the dater will rate a priori.
INITIAL_CANDIDATES = 20
@staticmethod
def random_binary_candidate(dims):
""" Generate a random binary candidate vector. """
ones = int(max(np.random.uniform() * dims, 1))
zeros = dims - ones;
d = np.asarray(np.hstack([np.ones(ones), np.zeros(zeros)]), dtype=int)
return d, ones, zeros
def __init__(self, w=None, dims=100):
gauss_weights = lambda: np.random.normal(loc=.1, scale=.2)
# Adversarial partition determined by experiment.
num_pos = int(np.random.normal(loc=dims * 3/8., scale=.05*dims))
num_pos = min(num_pos, dims - 1)
split = (num_pos, dims - num_pos)
self._w = generate_weights(gauss_weights, dims=dims, partition=split) \
if w is None else w
np.random.shuffle(self._w)
# Generate initial candidates.
self._initial_survey = []
for candidate_idx in range(Dater.INITIAL_CANDIDATES):
# Don't make an ideal candidate or a dup.
while True:
d, ones, zeros = self.random_binary_candidate(dims)
np.random.shuffle(d)
r = self.rate_a_date(d)
candidates = [e[0] for e in self._initial_survey]
if all((d != c).any() for c in candidates) and r != 1:
break
self._initial_survey.append((d, r))
def rate_a_date(self, d):
""" Date is a len(w) length vector. """
dims = len(self.w)
return int((np.dot(self.w, d) * dims) + 0.5) / float(dims)
def get_w(self):
return self._w
def get_initial_survey(self):
return self._initial_survey
w = property(get_w, doc="The dater's preference vector.")
initial_survey = property(get_initial_survey,
doc="Initial list of scored candidates.")
class Matchmaker(object):
""" The matchmaker tries to infer the preferences of the dater. """
def __init__(self, dims=100):
self._examples = []
self._w_estimate = np.zeros(dims)
self._alpha = 0.01
def new_client(self, examples):
"""
The Matchmaker works with one client at a time. Pass an initial list
of examples so that the matchmaker can begin its search.
"""
dims = len(examples[0][0])
self._w_estimate = np.zeros(dims)
self._examples = copy.copy(examples)
# Contraint that weights sum to zero.
for mul in np.arange(-1.0, 1.1, 0.25):
self.rate_example(np.ones(dims) * mul, 0.)
def find_date(self, method=None, **kwargs):
"""
Create a date.
method=None -- Default method uses current guess for best candidate
by taking all positive weights.
method='PERTURB_BEST' -- Takes the candidate composed of positive
weights and adds randon noise. Requires
additional parameter 'gauss_thresh' to
control the amount of noise such that
abs(N(mean=1, sigma=1)) > gauss_thresh
is the decision to perturb each attribute.
Lower thresholds give more perturbation.
method='RANDOM' -- Uniform random candidate.
"""
dims = len(self.w_estimate)
uniform_noise = np.random.uniform
gauss_noise = np.random.normal
# Current max date guess using positive weights.
w_est_date = np.asarray(self._w_estimate > 0.0, dtype=int)
# Select date using method.
if method is None:
mk_date = lambda: w_est_date
elif method is "PERTURB_BEST":
scores = [e[1] for e in self.examples]
max_score = (scores[0], 0)
for score, idx in zip(scores[1:], range(len(scores[1:]))):
if score > max_score[0]:
max_score = (score, idx)
mk_date = lambda: (self.examples[max_score[1]][0] + \
np.asarray((np.abs(gauss_noise(size=dims)) >
kwargs['gauss_thresh']), dtype=int)) % 2
elif method is "RANDOM":
mk_date = lambda: np.asarray(
np.abs(uniform_noise(size=dims)) > 0.5, dtype=int)
# Guarantee uniqueness.
dates = [e[0] for e in self.examples]
date_not_unique = lambda date: any((date == d).all() for d in dates)
while True:
# Get a date.
date = mk_date()
# Check same as any previous
if date_not_unique(date):
noise = np.random.normal(size=dims)
mutate = np.asarray(np.abs(noise) > 2., dtype=int)
date = (date + mutate) % 2
if not date_not_unique(date):
return date
else:
return date
def rate_example(self, d, r, iterations=400):
""" Rate example d with the actual Dater preference r. """
self._examples.append((d, r))
num_example = len(self.examples)
# permutation = range(num_example)
for iteration in range(iterations):
# Iterate all examples.
# np.random.shuffle(permutation)
# for idx in permutation:
bag = np.random.randint(0, num_example, size=num_example)
for idx in bag:
date, rate = self.examples[idx]
pred = np.dot(self.w_estimate, date)
error = rate - pred
w_new = self._alpha * error * date
self._w_estimate = self._w_estimate + w_new
def get_examples(self):
return self._examples
def get_w_estimate(self):
return self._w_estimate
examples = property(get_examples, doc="Dates retured with rate_example().")
w_estimate = property(get_w_estimate, doc="Currently estimated weights.")
class Serialization:
""" Serialize/deserialize dating objects. """
@staticmethod
def dater_to_lines(dater):
"""
Convert the dater's weights to a list of lines as per game architecture.
"""
s = ["weights\n"]
for w in dater.w:
s.append(str(w) + "\n")
return s
@staticmethod
def date_to_string(date):
""" Convert date vector [d1,...,dn] to string. """
s = "[" + str(date[0])
for v_i in date[1:]:
s += ", " + str(v_i)
s += "]"
return s
@staticmethod
def scored_date_from_string(scored_date):
""" Convert string of [d1,...,dn]:[s] to tuple (d, s). """
parts = scored_date.split(":")
d = np.array(eval(parts[0]))
s = eval(parts[1].strip("[]"))
return d, s
def iterate_matches(matchmaker, dater, iterations):
""" Show the dater some dates and track scores. """
scores = []
for date in range(iterations):
d = matchmaker.find_date()
r = dater.rate_a_date(d)
matchmaker.rate_example(d, r)
scores.append(dater.rate_a_date(matchmaker.find_date()))
return scores