In [3]:
import numpy as np
import pandas as pd

**a. Deterministic Dynamic Programming Solution**

In [8]:
# Parameter Initialization
x_values = np.array([-2, -1, 0, 1, 2])
u_values = np.array([-1, 0, 1])
T = 3

# Initialization of cost and policy arrays
costs = np.zeros((T+1, len(x_values)))
policy = np.zeros((T, len(x_values)))

# Dynamic programming algorithm
for t in range(T-1, -1, -1):
    for i, x in enumerate(x_values):
        min_cost = float('inf')
        best_u = None
        
        for u in u_values:
            next_x = -x + 1 + u if -2 <= (-x + 1 + u) <= 2 else -2
            
            if next_x in x_values:
                next_idx = np.where(x_values == next_x)[0][0]
                stage_cost = 2 * abs(x) + abs(u)
                total_cost = stage_cost + (costs[t+1][next_idx] if t < T-1 else 0)
                
                if total_cost < min_cost:
                    min_cost = total_cost
                    best_u = u
                    
        costs[t][i] = min_cost
        policy[t][i] = best_u

results = []
for t in range(T):
    for i, x in enumerate(x_values):
        results.append((t, x, policy[t][i], costs[t][i]))

# DataFrame to display results
df = pd.DataFrame(results, columns=['Time', 'State', 'Optimal Control', 'Cost'])
print(df)

    Time  State  Optimal Control  Cost
0      0     -2             -1.0  10.0
1      0     -1             -1.0   5.0
2      0      0             -1.0   2.0
3      0      1              0.0   3.0
4      0      2              1.0   6.0
5      1     -2              0.0   8.0
6      1     -1             -1.0   5.0
7      1      0             -1.0   1.0
8      1      1              0.0   2.0
9      1      2              1.0   5.0
10     2     -2              0.0   4.0
11     2     -1              0.0   2.0
12     2      0              0.0   0.0
13     2      1              0.0   2.0
14     2      2              0.0   4.0


**b. Sequence of Control Actions and Optimal Costs**


In [9]:
#Initialization of States
initial_states = [0, -2, 2]

# Deterministic Dynamic Programming for the sequence of Control Actions and Optimal Costs
for x0 in initial_states:
    x = x0
    u_seq = []
    x_seq = [x]
    cost = 0
    
    for t in range(3):
        u = float(df['Optimal Control'][df['State'] == x].values[t])
        u_seq.append(u)
        
        next_x = -x + 1 + u if -2 <= -x + 1 + u <= 2 else -2
        x = next_x
        x_seq.append(x)
        
        stage_cost = 2 * abs(x) + abs(u)
        cost += stage_cost
    
    print(f"x0 = {x0}:")
    print(f"States: {x_seq}")
    print(f"Actions: {u_seq}")
    print(f"Total cost: {cost}\n")

x0 = 0:
States: [0, 0.0, 0.0, 1.0]
Actions: [-1.0, -1.0, 0.0]
Total cost: 4.0

x0 = -2:
States: [-2, 2.0, 0.0, 1.0]
Actions: [-1.0, 1.0, 0.0]
Total cost: 8.0

x0 = 2:
States: [2, 0.0, 0.0, 1.0]
Actions: [1.0, -1.0, 0.0]
Total cost: 4.0



**c. Stochastic Dynamic Programming Solution**

In [11]:
# Parameter Initialization
x_values = np.array([-2, -1, 0, 1, 2])
u_values = np.array([-1, 0, 1])
T = 3
prob_w = [0.4, 0.6]

# Initialization of cost and policy arrays
costs = np.zeros((T+1, len(x_values)))
policy = np.zeros((T, len(x_values)))

# Dynamic programming algorithm with expectations
for t in range(T-1, -1, -1):
    for i, x in enumerate(x_values):
        min_cost = float('inf')
        best_u = None
        
        for u in u_values:
            cost = 0
            for w_idx, w in enumerate([0, 1]):
                next_x = -x + w + u if -2 <= (-x + w + u) <= 2 else -2
                next_idx = np.where(x_values == next_x)[0][0]
                
                future_cost = costs[t+1][next_idx] if t < T-1 else next_x**2
                stage_cost = 2 * abs(x) + abs(u) + future_cost
                cost += prob_w[w_idx] * stage_cost
            
            if cost < min_cost:
                min_cost = cost
                best_u = u
                
        costs[t][i] = min_cost
        policy[t][i] = best_u

# Results for probabilistic model
results = []
for t in range(T):
    for i, x in enumerate(x_values):
        results.append((t, x, policy[t][i], costs[t][i]))

# DataFrame to display results of probabilistic model
df = pd.DataFrame(results, columns=['Time', 'State', 'Optimal Control', 'Cost'])
print(df)

    Time  State  Optimal Control    Cost
0      0     -2             -1.0  10.600
1      0     -1             -1.0   5.952
2      0      0              0.0   2.952
3      0      1              0.0   4.880
4      0      2              1.0   7.880
5      1     -2             -1.0   9.200
6      1     -1             -1.0   4.680
7      1      0              0.0   1.680
8      1      1              0.0   3.800
9      1      2              1.0   6.800
10     2     -2             -1.0   7.800
11     2     -1             -1.0   3.600
12     2      0              0.0   0.600
13     2      1              0.0   2.400
14     2      2              1.0   5.400
