Permalink
Fetching contributors…
Cannot retrieve contributors at this time
135 lines (113 sloc) 3.8 KB
var hasProperties = function(object, listProperties) {
assert.ok(_.isObject(object) && _.isArray(listProperties), 'fail hasProperties');
return _.every(map(
function(property) {
return _.has(object, property);
}, listProperties));
};
var makeMDPAgentOptimal = function(params, world) {
// *params* should be an object containing *utility*, a utility function, and
// *alpha*, which regulates the agent's softmax noise.
map(function(s) {
assert.ok(params.hasOwnProperty(s), 'makeMDPAgent args');
}, ['utility', 'alpha']);
var stateToActions = world.stateToActions;
var transition = world.transition;
var utility = params.utility;
var alpha = params.alpha;
var act = dp.cache(
function(state) {
return Infer({
method: 'enumerate'
}, function() {
var action = uniformDraw(stateToActions(state));
var eu = expectedUtility(state, action);
factor(alpha * eu);
return action;
});
});
var expectedUtility = dp.cache(
function(state, action) {
var u = utility(state, action);
if (state.terminateAfterAction) {
return u;
} else {
return u + expectation(Infer({
method: 'enumerate'
}, function() {
var nextState = transition(state, action);
var nextAction = sample(act(nextState));
return expectedUtility(nextState, nextAction);
}));
}
});
return {
params,
expectedUtility,
act
};
};
var makeMDPAgentHyperbolic = function(params, world) {
assert.ok(hasProperties(params, ['utility', 'alpha', 'discount', 'sophisticatedOrNaive']),
'makeMDPAgentHyperbolic params');
var stateToActions = world.stateToActions;
var transition = world.transition;
var utility = params.utility;
// we can specify a discount function so that our 'hyperbolic' agent can
// actually be an exponential discounter (or some other kind of discounter)
var paramsDiscountFunction = params.discountFunction;
var discountFunction = (
paramsDiscountFunction ||
function(delay) {
return 1 / (1 + params.discount * delay);
});
var isNaive = params.sophisticatedOrNaive == 'naive';
var act = dp.cache(
function(state, delay) {
var delay = delay ? delay : 0; //make sure delay is never 'undefined'
return Infer({
method: 'enumerate'
}, function() {
var action = uniformDraw(stateToActions(state));
var eu = expectedUtility(state, action, delay);
factor(params.alpha * eu);
return action;
});
});
var expectedUtility = dp.cache(
function(state, action, delay) {
var u = discountFunction(delay) * utility(state, action);
assert.ok(!_.isUndefined(u),
"utility undefined" + JSON.stringify([state, action, delay, utility(state, action)]));
if (state.terminateAfterAction) {
return u;
} else {
return u + expectation(Infer({
method: 'enumerate'
}, function() {
var nextState = transition(state, action);
var perceivedDelay = isNaive ? delay + 1 : 0;
var nextAction = sample(act(nextState, perceivedDelay));
return expectedUtility(nextState, nextAction, delay + 1);
}));
}
});
return {
params,
expectedUtility,
act
};
};
var isOptimalMDPAgent = function(agentParams) {
var optimalProperties = function() {
return !(_.has(agentParams, 'discount') ||
_.has(agentParams, 'discountFunction') ||
_.has(agentParams, 'sophisticatedOrNaive'));
};
return _.isUndefined(agentParams.optimal) ? optimalProperties() : agentParams.optimal;
};
var makeMDPAgent = function(params, world) {
return (isOptimalMDPAgent(params) ?
makeMDPAgentOptimal(params, world) :
makeMDPAgentHyperbolic(params, world));
};