Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,22 @@ def runValueIteration(self):
value iteration, V_k+1(...) depends on V_k(...)'s.
"""
"*** YOUR CODE HERE ***"

V_curr = util.Counter()
for _ in range(self.iterations):
states = self.mdp.getStates()
for state in states:
if self.mdp.isTerminal(state):
V_curr[state] = float(0)
continue
Q_curr = []
for act in self.mdp.getPossibleActions(state):
qValue = self.computeQValueFromValues(state, act)
Q_curr.append(qValue)
V_curr[state] = max(Q_curr)
self.values = V_curr.copy()



def getValue(self, state):
"""
Return the value of the state (computed in __init__).
Expand All @@ -78,7 +93,16 @@ def computeQValueFromValues(self, state, action):
value function stored in self.values.
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
q = 0
if not self.mdp.isTerminal(state):
for n in self.mdp.getTransitionStatesAndProbs(state, action):
nextState = n[0]
prob = n[1]
q += prob*(self.mdp.getReward(state, action, nextState) + self.discount*self.values[nextState])
return q




def computeActionFromValues(self, state):
"""
Expand All @@ -90,6 +114,10 @@ def computeActionFromValues(self, state):
terminal state, you should return None.
"""
"*** YOUR CODE HERE ***"
policy = util.Counter();
for action in self.mdp.getPossibleActions(state):
policy[action] = self.getQValue(state, action);
return policy.argMax();
util.raiseNotDefined()

def getPolicy(self, state):
Expand Down