@@ -131,7 +131,7 @@
},
{
"cell_type" : " code" ,
"execution_count" : 4 ,
"execution_count" : 7 ,
"metadata" : {
"collapsed" : true
},
@@ -165,7 +165,7 @@
},
{
"cell_type" : " code" ,
"execution_count" : 5 ,
"execution_count" : 8 ,
"metadata" : {
"collapsed" : true
},
@@ -434,7 +434,7 @@
},
{
"cell_type" : " code" ,
"execution_count" : 18 ,
"execution_count" : 9 ,
"metadata" : {
"collapsed" : true ,
"scrolled" : true
@@ -467,7 +467,9 @@
{
"cell_type" : " code" ,
"execution_count" : 10 ,
"metadata" : {},
"metadata" : {
"collapsed" : true
},
"outputs" : [],
"source" : [
" def valitr(P, R, discount, eps):\n " ,
@@ -511,8 +513,10 @@
},
{
"cell_type" : " code" ,
"execution_count" : 13 ,
"metadata" : {},
"execution_count" : 11 ,
"metadata" : {
"collapsed" : true
},
"outputs" : [],
"source" : [
" def policyitr(P, R, discount, eps):\n " ,
@@ -564,26 +568,42 @@
"cell_type" : " markdown" ,
"metadata" : {},
"source" : [
" #### Simplex Solutions "
" #### CVXPY Solvers for the primal problem "
]
},
{
"cell_type" : " code" ,
"execution_count" : null ,
"metadata" : {
"collapsed" : true
},
"execution_count" : 44 ,
"metadata" : {},
"outputs" : [],
"source" : [
" Y=cvx.Variable(P_0.shape[1]) # number of states\n " ,
" cons=[A*Y>=b]\n " ,
" objective=cvx.Minimize(Y[0])\n " ,
" prob=cvx.Problem(objective,cons)\n " ,
" data = prob.get_problem_data(ECOS)\n " ,
" prob.solve(solver=ECOS,abstol=1e-7,max_iters=1000,verbose=True)\n " ,
" print (\" status: %s\" % prob.status)\n " ,
" print (\" optimal value %s\" % objective.value)\n " ,
" print(Y.value)"
" def ECOS_Primal(A,b,c,P_0,tol):\n " ,
" Y=cvx.Variable(P_0.shape[1]) # number of states\n " ,
" cons=[A*Y>=b]\n " ,
" objective=cvx.Minimize(cvx.sum_entries(cvx.mul_elemwise(c,Y)))\n " ,
" prob=cvx.Problem(objective,cons)\n " ,
" prob.solve(solver=ECOS,reltol=tol,max_iters=1000,verbose=False)\n " ,
" #print (\" status: %s\" % prob.status)\n " ,
" #print (\" optimal value %s\" % objective.value)\n " ,
" return Y.value\n " ,
" def SCS_Primal(A,b,c,P_0,tol):\n " ,
" Y=cvx.Variable(P_0.shape[1]) # number of states\n " ,
" cons=[A*Y>=b]\n " ,
" objective=cvx.Minimize(cvx.sum_entries(cvx.mul_elemwise(c,Y)))\n " ,
" prob=cvx.Problem(objective,cons)\n " ,
" prob.solve(solver=SCS,eps=tol,max_iters=1000,verbose=False)\n " ,
" #print (\" status: %s\" % prob.status)\n " ,
" #print (\" optimal value %s\" % objective.value)\n " ,
" return Y.value\n " ,
" def CVXOPT_Primal(A,b,c,P_0,tol):\n " ,
" Y=cvx.Variable(P_0.shape[1]) # number of states\n " ,
" cons=[A*Y>=b]\n " ,
" objective=cvx.Minimize(cvx.sum_entries(cvx.mul_elemwise(c,Y)))\n " ,
" prob=cvx.Problem(objective,cons)\n " ,
" prob.solve(solver=CVXOPT,reltol=tol,max_iters=1000,verbose=False)\n " ,
" #print (\" status: %s\" % prob.status)\n " ,
" #print (\" optimal value %s\" % objective.value)\n " ,
" return Y.value"
]
},
{
@@ -609,7 +629,7 @@
},
{
"cell_type" : " code" ,
"execution_count" : null ,
"execution_count" : 126 ,
"metadata" : {
"collapsed" : true
},
@@ -633,7 +653,7 @@
},
{
"cell_type" : " code" ,
"execution_count" : null ,
"execution_count" : 149 ,
"metadata" : {
"collapsed" : true
},
@@ -660,7 +680,7 @@
},
{
"cell_type" : " code" ,
"execution_count" : null ,
"execution_count" : 127 ,
"metadata" : {
"collapsed" : true
},
@@ -686,7 +706,7 @@
},
{
"cell_type" : " code" ,
"execution_count" : null ,
"execution_count" : 128 ,
"metadata" : {
"collapsed" : true
},
@@ -711,8 +731,10 @@
},
{
"cell_type" : " code" ,
"execution_count" : 5 ,
"metadata" : {},
"execution_count" : 129 ,
"metadata" : {
"collapsed" : true
},
"outputs" : [],
"source" : [
" def descent(update, update_name, x, A, b, eta0, tol, optVal, T=int(1e4)):\n " ,
@@ -721,44 +743,22 @@
" error = []\n " ,
" \n " ,
" t = int(0)\n " ,
" \n " ,
" while t <= T and la.norm(x - optVal,2) > eps:\n " ,
" xnew = x + 1;\n " ,
" while t <= T and la.norm(x - xnew, 2) > tol:\n " ,
" x = xnew\n " ,
" if update_name == \" gradient\" :\n " ,
" x = update(x, A, b, t, eta0)\n " ,
" xnew = update(x, A, b, t, eta0)\n " ,
" \n " ,
" elif update_name == \" accelerated\" :\n " ,
" x , v, theta = update(x, v, theta, A, b, t, eta0)\n " ,
" xnew , v, theta = update(x, v, theta, A, b, t, eta0)\n " ,
" \n " ,
" if(t % 1 == 0) or (t == T - 1):\n " ,
" error.append(la.norm(x - optVal,2))\n " ,
" \n " ,
" t = int(t + 1)\n " ,
" \n " ,
" return x, error, t"
]
},
{
"cell_type" : " code" ,
"execution_count" : 1 ,
"metadata" : {},
"outputs" : [
{
"ename" : " NameError" ,
"evalue" : " name 'descent' is not defined" ,
"output_type" : " error" ,
"traceback" : [
" \u001b [0;31m---------------------------------------------------------------------------\u001b [0m" ,
" \u001b [0;31mNameError\u001b [0m Traceback (most recent call last)" ,
"\u001b[0;32m<ipython-input-1-b173a4be70a1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mx_pg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror_pg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdescent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgraddes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"gradient\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvals\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mA\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvals\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mx_ag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror_ag\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdescent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maccelgrad\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"accelerated\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvals\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mA\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1e-1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvals\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
" \u001b [0;31mNameError\u001b [0m: name 'descent' is not defined"
]
}
],
"source" : [
" x_pg, error_pg, t = descent(graddes, \" gradient\" , vals+100, A, b, 1, tol, vals)\n " ,
" x_ag, error_ag, t = descent(accelgrad, \" accelerated\" , vals+100, A, b, 1e-1, tol, vals)"
]
},
{
"cell_type" : " markdown" ,
"metadata" : {},
@@ -768,7 +768,7 @@
},
{
"cell_type" : " code" ,
"execution_count" : null ,
"execution_count" : 29 ,
"metadata" : {
"collapsed" : true
},
@@ -785,7 +785,7 @@
},
{
"cell_type" : " code" ,
"execution_count" : null ,
"execution_count" : 28 ,
"metadata" : {
"collapsed" : true
},
@@ -804,7 +804,7 @@
},
{
"cell_type" : " code" ,
"execution_count" : null ,
"execution_count" : 27 ,
"metadata" : {
"collapsed" : true
},
@@ -814,13 +814,13 @@
" #finds the analytical center for IPM using gradient descent\n " ,
" for i in range (T):\n " ,
" g = findBarrierGradient(A,b,xFeasible)\n " ,
" xFeasible = xFeasible - 0.1 /(i+1)*g;\n " ,
" xFeasible = xFeasible - 0.01 /(i+1)*g;\n " ,
" return xFeasible"
]
},
{
"cell_type" : " code" ,
"execution_count" : null ,
"execution_count" : 26 ,
"metadata" : {
"collapsed" : true
},
@@ -837,10 +837,10 @@
" stateVals = stateValsNew\n " ,
" #Finds the gradient and hessian\n " ,
" g = t*np.ones([stateVals.size, 1]) + findBarrierGradient(A,b,stateVals)\n " ,
" # H = findBarrierHessian(A,b,stateVals)\n " ,
" H = findBarrierHessian(A,b,stateVals)\n " ,
" #Updates the calues\n " ,
" # stateValsNew = stateVals - np.dot(np.linalg.inv(H),g)\n " ,
" stateValsNew = stateVals - 1/np.sqrt(itr)*g;\n " ,
" stateValsNew = stateVals - np.dot(np.linalg.inv(H),g)\n " ,
" # stateValsNew = stateVals - 1/np.sqrt(itr)*g;\n " ,
" error.append(np.linalg.norm(stateValsNew - stateValsOpt,2))\n " ,
" t = t*(1+alpha)\n " ,
" if np.isnan(error[-1]):\n " ,
@@ -850,7 +850,7 @@
},
{
"cell_type" : " code" ,
"execution_count" : null ,
"execution_count" : 30 ,
"metadata" : {
"collapsed" : true
},
@@ -865,26 +865,123 @@
" return stateVals, error"
]
},
{
"cell_type" : " code" ,
"execution_count" : 15 ,
"metadata" : {},
"outputs" : [],
"source" : [
" def plotGridWorld(R, bestAction, Rows, Columns):\n " ,
" imgData = np.resize(R,[Rows, Columns]);\n " ,
" imgData = np.flipud(imgData); \n " ,
" plt.imshow(imgData, interpolation='nearest')\n " ,
" bestAction = np.resize(bestAction,[Rows, Columns])\n " ,
" for a in range(Rows):\n " ,
" for b in range(Columns): \n " ,
" if bestAction[Rows -1 - a,b]== 0:\n " ,
" plt.text(b,a ,r'$ \\ leftarrow $')\n " ,
" elif bestAction[Rows -1 - a,b]== 1:\n " ,
" plt.text(b,a,r'$ \\ rightarrow $')\n " ,
" elif bestAction[Rows -1 - a,b]== 2:\n " ,
" plt.text(b,a,r'$ \\ uparrow $')\n " ,
" elif bestAction[Rows -1 - a,b] == 3:\n " ,
" plt.text(b,a,r'$ \\ downarrow $')\n " ,
" elif bestAction[Rows -1 - a,b] == 4:\n " ,
" plt.text(b,a ,r'$ o $')\n " ,
" #plt.text(b,a,r'$ \\ leftarrow $')\n " ,
" \n " ,
" plt.colorbar()\n " ,
" plt.show()"
]
},
{
"cell_type" : " markdown" ,
"metadata" : {},
"source" : [
" ### Examples\n " ,
" \n " ,
" In the first example we use a 5 by 5 grid world example to compare the required number of iterations for different methods to achieve a desired tolerance level. The agent starts from the bottom left grid and its aim is to reach the top left grid while avoiding some of the intermediate grids. We solve this problem using (1) value iteration, (2) policy iteration, (3) simplex method, (4) cvxpy with SCS and ECOS solvers, (5) projected gradient descent, (6) accelerated projected gradient descent and (7) interior point method. We assume that the agent takes the chosen action with probability 0.8, and we use the discount factor of 0.9. The desired tolerance is $10^{-7}$."
" In the first example we use a 5 by 5 grid world example to compare the required number of iterations for different methods to achieve a desired tolerance level. The agent starts from the bottom left grid and its aim is to reach the top left grid while avoiding some of the intermediate grids. We solve this problem using (1) value iteration, (2) policy iteration, (3) simplex method, (4) cvxpy with SCS,ECOS and CVXOPT solvers, (5) projected gradient descent, (6) accelerated projected gradient descent and (7) interior point method. We assume that the agent takes the chosen action with probability 0.8, and we use the discount factor of 0.9. The desired tolerance is $10^{-7}$."
]
},
{
"cell_type" : " code" ,
"execution_count" : 19 ,
"execution_count" : 150 ,
"metadata" : {},
"outputs" : [],
"outputs" : [
{
"data" : {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAScAAAD8CAYAAAA11GIZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFMdJREFUeJzt3XGslXd9x/H3pxfwFim9Al1zB0ibBWyKaYuj1K4mapuu\ntDq7LSZru2psNKSJXWpmovWfLWZZls3MGGOVECWd0dgY7TbW4FiNuNZKEaiIcBEHtS3UuxBaaC8Q\nyr33fPfHObgDvfee5/aec57f7zyfV/Ik9zznuc/z4XD48nt+z+95fooIzMxSc1HZAczMJuLiZGZJ\ncnEysyS5OJlZklyczCxJLk5mliQXJzObMUkbJR2VtHeS9yXpy5IOStoj6V2t9uniZGbt8Aiwdor3\nbweWN5Z1wNda7dDFycxmLCKeBF6ZYpM7gW9G3TPAgKTBqfY5q50Bz5mjt0Q/b+3Ers06ZsU1p8uO\nUNjzh0c59sq4ZrKP297/1nj5lfFC2+7a8/o+4EzTqg0RsWEah1sMHG56faSxbniyX+hIcernrdyg\nWzqxa7OO2bJld9kRCltz2+HWG7Xw8ivj/GzL2wtt2zf4P2ciYvWMDzoNHSlOZpa+AGrUunW4l4Cl\nTa+XNNZNyn1OZhUVBKMxXmhpg03ARxtX7d4NvBoRk57SgVtOZpXWrpaTpO8A7wMWSToC/C0wGyAi\n1gObgTuAg8Bp4L5W+3RxMquoIBhv0yOTIuLuFu8H8Mnp7NPFyazCaqT7PDcXJ7OKCmDcxcnMUuSW\nk5klJ4DRhB/T7eJkVlFB+LTOzBIUMJ5ubXJxMquq+gjxdLk4mVWWGGdG9w53lIuTWUXVO8RdnMws\nMfVxTi5OZpagmltOZpaa1FtOPfHIlJE4wek4WXaMwnLKm1NWyCvvnqHXee6F0dKOH4hxLiq0lKHQ\nUSWtlXSgMXPCQ50ONV01auxhWzZfypzy5pQV8sp75vXgz+8bLrVA1UKFljK0PK2T1Ac8DNxK/bm/\nOyRtioihToebyHC8wPMceMP6s5xhL9tZQ1qPB84pb05ZIa+83/reCP/0leNvWD98dIx77v9fnvnB\n0gl+q7MCcTb6un7coor0Oa0BDkbEcwCSHqU+k0IpxWlQyxhk2XnrzsRpdvM0K7i2jEhTyilvTlkh\nr7z3fvgS7v3wJeete/HIKH/6sWH++fOLSslUH4SZbs9OkeI00awJN1y4kaR11Oejop+5bQlX1ClG\nuIpVDKicv+TpyilvTlkhr7wHDo3ylX+4jD+6/uLSMqTcId62q3WNaWI2AMzXgq7esbNQl3fzcDOW\nU96cskJeeW99b3f/E79QhBiPvFtO0541wczyUMu85bQDWC7pSupF6S7gno6mMrOOq3eIpzvUsWWy\niBiT9ACwBegDNkbEvo4nM7OO6oUOcSJiM/WpXcysh4z79hUzS825EeKpcnEyq7Ba5lfrzKwH1W/8\ndXEys8QEYjTz21fMrAdFkP0gTDPrScp+EKaZ9aDALSczS5Q7xM0sOUF5D5IrwsXJrKLqU0OlWwLS\nTWZmHeZJNc0sQYFHiJtZolJuOaVbNs2soyJELS4qtLTSaoYmSZdK+g9Jv5C0T9J9rfbplpNZRdU7\nxGd++0rBGZo+CQxFxJ9Iugw4IOnbEXF2sv26OJlVVtueIV5khqYALpEkYB7wCjA21U5dnMwabvv9\n68qOUNiv4+UZ76PeIV64z2mRpJ1Nrzc0JjWBYjM0fQXYBPwWuAT4i4ioTXVAFyezCpvGCPFjEbF6\nBoe6DdgN3Az8AfCEpKci4rXJfsEd4mYVdW6EeBumIy8yQ9N9wGNRdxD4DXDVVDt1cTKrsBoXFVpa\n+N0MTZLmUJ+hadMF27wI9fnhJV0OvAN4bqqd+rTOrKIiYLQ28/bJZDM0Sbq/8f564O+ARyT9EhDw\n2Yg4NtV+XZzMKqp+Wteek6eJZmhqFKVzP/8W+OPp7NPFyazCUh4h7uJkVlHTHErQdS5OZpXVvtO6\nTnBxMqswP0PczJJTv1rnqaHMLDF+TK+ZJcundWaWHF+tM+uCsRjlNCeZr7eVHaWllLL6ap1ZB43F\nKM/yFCc5wTVxI4s0WHakSaWUNUKMuTiZdc4QuxhgIbOZwyGGmBeX0q+5ZceaUGpZfVrXYSNxgj5m\nMVfzyo5SSE55c8i6kus5xasc5hDXcCN9SvfyeEpZU+9zatmmk7RR0lFJe7sR6M2oUWMP2zgdJ8uO\nUkhOeXPI2vwPPOXCBOllbdPznDqiSMvpEeqP2PxmZ6MUMxwv8DwH3rD+LGfYy3bW1B8Zk4yc8uaU\n1WYu+3FOEfGkpCs6H6WYQS1jkGXnrTsTp9nN06zg2pJSTS6nvDlltfbwOKcOO8UIV7GKAS0qO0oh\nOeXNKatNTwSMteFhc53StuIkaR2wDqCf7l59WKjLu3q8mcopby5Z52sBK1lQdoxCUsqa9WldUY1p\nYjYAzNeCaNd+zawzsu9zMrPeFQkXpyJDCb4DbAPeIemIpI93PpaZdUMNFVrKUORq3d3dCGJm3RVR\nkT4nM8uNGK/C1Tozy0/KfU4uTmYVlfq9dS5OZlUV9X6nVLk4mVWYb18xs+SEO8TNLFU+rTOzJPlq\nnZklJ8LFycwS5aEEZpYk9zmZWXICUfPVOjNLUcINp9aPTDGzHtXoEC+ytCJpraQDkg5KemiSbd4n\nabekfZL+u9U+3XIyq7I2NJ0k9QEPA7cCR4AdkjZFxFDTNgPAV4G1EfGipN9rtV+3nMwqrE0tpzXA\nwYh4LiLOAo8Cd16wzT3AYxHxYv24cbTVTl2czCoqgFpNhRZgkaSdTcu6pl0tBg43vT7SWNdsBfA2\nST+WtEvSR1vl82mdWVUFUHyc07GIWD2Do80C/hC4BbgY2CbpmYj49VS/YGYV1aZxTi8BS5teL2ms\na3YEeDkiTgGnJD0JXAtMWpx8WmdWZVFwmdoOYLmkKyXNAe4CNl2wzb8D75E0S9Jc4AZg/1Q7dcvJ\nrLKKDRNoJSLGJD0AbAH6gI0RsU/S/Y3310fEfkn/CewBasDXI2LvVPt1cTKrsjaNwoyIzcDmC9at\nv+D1F4AvFN2ni5NZVQVEzTf+mlmSXJzMLEUJ31zn4mRWZS5OZpac6Q3C7DoXJ7MK88PmzCxNvlpn\nZimSW05mlpxit6aUJut768ZilNfieNkxelJun21OedPJqnqHeJGlBNkWp7EY5VmeYidbORbDZcfp\nKbl9tjnlTS5re2787YhsT+uG2MUAC5nNHA4xxLy4lH7NLTtWT8jts80pb3JZa+UdupVsi9NKrucU\nr3KYQ1zDjfSpr+xIhY3ECfqYxVzNKzvKhHL7bHPKm1TWxMc5tTytk7RU0lZJQ41ZEx7sRrBWmv9S\nU/4yTqRGjT1s43ScLDvKhHL7bHPKm1pWRbGlDEVaTmPApyPiWUmXALskPdE8s4JNbjhe4HkOvGH9\nWc6wl+2s4ZYSUpk1JHy1rmVxiohhYLjx84ik/dQfXu7iVMCgljHIsvPWnYnT7OZpVnBtSanM0jet\nPidJVwCrgO0TvLcOWAfQT5qdkak4xQhXsYoBLSo7ilVcTwzClDQP+D7wqYh47cL3I2IDsAFgvhZ0\n5Y88XwtYyYJuHKqtFurysiO0lNtnm1PeZLIG+d++Imk29cL07Yh4rLORzKxrcm45SRLwDWB/RHyx\n85HMrFtSPq0rMkL8JuAjwM2SdjeWOzqcy8y6IecR4hHxE1J+0LCZvXkJt5yyHSFuZjNT5gDLIlyc\nzKos96t1Ztab3HIyszS5OJlZctznZGbJcnEysxQp4YfNZfuYXjPrbW45mVWZT+vMLDnuEDezZLk4\nmVmSXJzMLDXCV+vMLEUFZ14p0i8laa2kA5IOSnpoiu2ulzQm6cOt9uniZFZlbXiek6Q+4GHgduBq\n4G5JV0+y3T8C/1UkmouTWZW152Fza4CDEfFcRJwFHgXunGC7v6L+uO+jRaK5OJlV2DRO6xZJ2tm0\nrGvazWLgcNPrI411/38caTHwZ8DXimZzh7hZlRW/WncsIlbP4EhfAj4bEbX6tAStuTiZVVW07Wrd\nS8DSptdLGuuarQYebRSmRcAdksYi4t8m26mLk1mVtWec0w5guaQrqRelu4B7zjtMxJXnfpb0CPD4\nVIUJXJzMKq0dt69ExJikB4AtQB+wMSL2Sbq/8f76N7NfFyezKmvTCPGI2AxsvmDdhEUpIj5WZJ8u\nTmZVVeKcdEW4OJlVlPBTCcwsUS5OZpYmFyczS5KLk5klx0/CNLNkuTiZWYr8sLkOGYtRXovjZcfo\nSbl9tjnlTSlrux421wnZFqexGOVZnmInWzkWw2XH6Sm5fbY55U0qa9FnOZVUnLI9rRtiFwMsZDZz\nOMQQ8+JS+jW37Fg9IbfPNqe8yWV1n1P7reR6TvEqhznENdxIn/rKjlTYSJygj1nM1byyo0wot882\np7wpZU19hHjL0zpJ/ZJ+JukXkvZJ+nw3grXS/Jea8pdxIjVq7GEbp+Nk2VEmlNtnm1Pe1LKqFoWW\nMhRpOb0O3BwRJyXNBn4i6QcR8UyHs/WE4XiB5znwhvVnOcNetrOGW0pIZUb+N/5GRADn/ouf3VgS\n/iOlZVDLGGTZeevOxGl28zQruLakVGZ1WZ/WQX1KF0m7qc+a8EREbO9srN52ihGuYhUDWlR2FKu6\n3K/WRcQ4cJ2kAeBfJb0zIvY2b9OYjWEdQD/dufowXwtYyYKuHKudFurysiO0lNtnm1PelLJm33I6\nJyJOAFuBtRO8tyEiVkfE6tm8pV35zKyTEm45Fblad1mjxYSki4FbgV91OpiZdVhj9pUiSxmKnNYN\nAv/SmEr4IuC7EfF4Z2OZWaelPs6pyNW6PcCqLmQxs26LdKtTtiPEzWzmsm45mVmPyn0Qppn1rpSf\n5+TiZFZhLk5mlp7AHeJmliZ3iJtZmlyczCw12Q/CNLMeFeU9SK4IFyezKku3Nrk4mVWZT+vMLD0B\n+LTOzJKUbm3Kd1JNM5u5ds34K2mtpAOSDkp6aIL3/1LSHkm/lPRTSS0foO+Wk1mFteNqXeNZbw9T\nfxDlEWCHpE0RMdS02W+A90bEcUm3AxuAG6bar1tOZlXVvunI1wAHI+K5iDgLPArced6hIn4aEccb\nL58BlrTaqVtOZhVVH4RZuOW0SNLOptcbImJD4+fFwOGm944wdavo48APWh3Qxcmsyoo/leBYRKye\n6eEkvZ96cXpPq21dnMwqbBotp6m8BCxter2kse78Y0nXAF8Hbo+Il1vt1H1OZlXVvj6nHcBySVdK\nmgPcBWxq3kDS24HHgI9ExK+LxHPLyayy2nNvXUSMSXoA2AL0ARsjYp+k+xvvrwf+BlgIfFUSwFir\n00QXJ7Mqa9PD5iJiM7D5gnXrm37+BPCJ6ezTxcmsqsKP6TWzVPkxvWaWpHRrk4uTWZWplu55nYuT\nWVUF0xmE2XUuTmYVJaJdgzA7wsXJrMpcnMwsSS5OZpacxPucsr63bixGee13j4ixKsvpu5BSVtVq\nhZYyZFucxmKUZ3mKnWzlWAyXHcdKlNN3Ia2sUT+tK7KUINvTuiF2McBCZjOHQwwxLy6lX3PLjmUl\nyOm7kFTWwH1OnbCS6znFqxzmENdwI33qKztSYSNxgj5mMVfzyo7SUg5Zc/ouJJe1F/qcJPVJ+rmk\nxzsZqKjmv9TS/4KnqUaNPWzjdJwsO0pLOWTN6buQWlZFFFrKMJ2W04PAfmB+h7L0pOF4gec58Ib1\nZznDXrazhltKSDWxnLJam+R+WidpCfAB4O+Bv+5ooh4zqGUMsuy8dWfiNLt5mhW0nLqrq3LKam0Q\nAePpntcVbTl9CfgMcMlkG0haB6wD6CfNzshUnGKEq1jFgBaVHaWlnLLam5Bzy0nSB4GjEbFL0vsm\n264xTcwGgPla0JU/8XwtYCULunGotlqoy8uOUFguWXP6LiSVNefiBNwEfEjSHUA/MF/StyLi3s5G\nM7OOCqANzxDvlJZX6yLicxGxJCKuoD6rwo9cmMx6QUDUii0lyHack5nNUNATHeIARMSPgR93JImZ\ndV/mfU5m1qtcnMwsPeXd1FuEi5NZVQXgCQ7MLEluOZlZenrj9hUz6zUBUdIYpiJcnMyqLOER4i5O\nZlXmPiczS06Er9aZWaLccjKz9AQxPl52iEm5OJlVVeKPTHFxMquyhIcSZDupppnNTABRi0JLK5LW\nSjog6aCkhyZ4X5K+3Hh/j6R3tdqni5NZVUV7HjYnqQ94GLgduBq4W9LVF2x2O7C8sawDvtYqnouT\nWYXF+HihpYU1wMGIeC4izgKPAndesM2dwDej7hlgQNLgVDvtSJ/TCMeP/TC+90Kbd7sIONbmfXZS\nTnlzygp55e1U1mWtN5naCMe3/DC+V3RanX5JO5teb2hMagKwGDjc9N4R4IYLfn+ibRYDw5MdsCPF\nKSIua/c+Je2MiNXt3m+n5JQ3p6yQV96Us0bE2rIzTMWndWY2Uy8BS5teL2msm+4253FxMrOZ2gEs\nl3SlpDnUZ2nadME2m4CPNq7avRt4NSImPaWDvMY5bWi9SVJyyptTVsgrb05Z35SIGJP0ALAF6AM2\nRsQ+Sfc33l8PbAbuAA4Cp4H7Wu1XkfC9NWZWXT6tM7MkuTiZWZKyKE6thsanRNJGSUcl7S07SyuS\nlkraKmlI0j5JD5adaTKS+iX9TNIvGlk/X3amIiT1Sfq5pMfLzpKb5ItTwaHxKXkESHr8SJMx4NMR\ncTXwbuCTCX+2rwM3R8S1wHXA2sZVn9Q9COwvO0SOki9OFBsan4yIeBJ4pewcRUTEcEQ82/h5hPo/\nosXlpppY47aHk42XsxtL0ldzJC0BPgB8vewsOcqhOE027N3aSNIVwCpge7lJJtc4RdoNHAWeiIhk\nszZ8CfgMkO5zSRKWQ3GyDpM0D/g+8KmIeK3sPJOJiPGIuI766OI1kt5ZdqbJSPogcDQidpWdJVc5\nFKdpD3u34iTNpl6Yvh0Rj5Wdp4iIOAFsJe2+vZuAD0l6nnpXxM2SvlVupLzkUJyKDI23N0GSgG8A\n+yPii2XnmYqkyyQNNH6+GLgV+FW5qSYXEZ+LiCURcQX17+yPIuLekmNlJfniFBFjwLmh8fuB70bE\nvnJTTU7Sd4BtwDskHZH08bIzTeEm4CPU/1ff3VjuKDvUJAaBrZL2UP8P64mI8OX5HubbV8wsScm3\nnMysmlyczCxJLk5mliQXJzNLkouTmSXJxcnMkuTiZGZJ+j94h+YUGAYsTwAAAABJRU5ErkJggg==\n",
"text/plain" : [
" <matplotlib.figure.Figure at 0x11351aa20>"
]
},
"metadata" : {},
"output_type" : " display_data"
}
],
"source" : [
" rows,columns,prob,discount,tol=5,5,0.8,0.9,1e-7\n " ,
" A,b,c,P_0=example(rows,columns,prob,discount,tol)\n " ,
" vals, bestAction = valitr(P_0, b, discount, tol) # Value Iteration\n " ,
" vals2, bestAction2,error = policyitr(P_0, b[0:P_0.shape[1]], discount, tol) # Policy Iteration\n " ,
" res = linprog(c, -A, -b, A_eq=None, b_eq=None, bounds=None, method='simplex', callback=None, options={'disp': False, 'bland': False, 'tol': tol, 'maxiter': 1000})\n "
" valit_val, bestAction = valitr(P_0, b, discount, tol) # Value Iteration\n " ,
" polyit_val, bestAction2,error = policyitr(P_0, b[0:P_0.shape[1]], discount, tol) # Policy Iteration\n " ,
" res = linprog(c, -A, -b, A_eq=None, b_eq=None, bounds=None, method='simplex',\\\n " ,
" callback=None, options={'disp': False, 'bland': False, 'tol': tol, 'maxiter': 1000}) # Simplex\n " ,
" ecos_val=ECOS_Primal(A,b,c,P_0,tol) # ECOS Primal\n " ,
" scs_val=SCS_Primal(A,b,c,P_0,tol) # SCS Primal\n " ,
" cvxopt_val=CVXOPT_Primal(A,b,c,P_0,tol) # CVXOPT Primal\n " ,
" projgrad_val, projgrad_error, projgrad_iter = descent(graddes, \" gradient\" , vals+100, A, b, 1, tol, valit_val) # Projected Gradient\n " ,
" accgrad_val, accgrad_error, accgrad_iter = descent(accelgrad, \" accelerated\" , vals+100, A, b, 1e-1, tol, valit_val) # Accelerated Gradient Descent\n " ,
" interior_val, interior_error=interiorPoint(A, b, discount, tol, 0.01, valit_val) # Interior Point Method\n " ,
" plotGridWorld(b[0:P_0.shape[1]], bestAction2, rows, columns)\n "
]
},
{
"cell_type" : " code" ,
"execution_count" : 148 ,
"metadata" : {},
"outputs" : [
{
"name" : " stdout" ,
"output_type" : " stream" ,
"text" : [
" 0.0633940149557\n " ,
" [[ 3.9892686 ]\n " ,
" [ 4.27339486]\n " ,
" [ 4.68089128]\n " ,
" [ 5.14291423]\n " ,
" [ 5.56718529]\n " ,
" [ 4.27545262]\n " ,
" [ 4.60805877]\n " ,
" [ 5.08252695]\n " ,
" [ 5.62306434]\n " ,
" [ 6.14954544]\n " ,
" [ 4.68390263]\n " ,
" [ 5.08619153]\n " ,
" [ 5.64601766]\n " ,
" [ 6.2843159 ]\n " ,
" [ 6.92617588]\n " ,
" [ 5.14186379]\n " ,
" [ 5.62912687]\n " ,
" [ 6.28261377]\n " ,
" [ 7.04332178]\n " ,
" [ 7.85415438]\n " ,
" [ 5.57292957]\n " ,
" [ 6.14429582]\n " ,
" [ 6.92649722]\n " ,
" [ 7.85195491]\n " ,
" [ 8.9830694 ]]\n "
]
}
],
"source" : [
" #finds another feasible point using the optimal solution\n " ,
" valsFeasible = polyit_val +1;\n " ,
" #finds analytical center\n " ,
" vals_analytical = findAnalyticCenter(A,b,valsFeasible,1000);\n " ,
" print(np.min(np.dot(A,vals_analytical)-b))\n " ,
" projgrad_val, projgrad_error, projgrad_iter = descent(graddes, \" gradient\" , vals_analytical, A, b, 1, tol, valit_val) # Projected Gradient\n " ,
" accgrad_val, accgrad_error, accgrad_iter = descent(accelgrad, \" accelerated\" , vals_analytical, A, b, 1, tol, valit_val, T = 10) # Accelerated Gradient Descent\n " ,
" print(projgrad_val)"
]
},
{