<a href="https://colab.research.google.com/github/itsmeneeraj/alpha/blob/main/dynamicprog.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

# Define parameters
initial_wealth = 100000  # Initial wealth
target_wealth = 200000  # Target wealth
investment_horizon = 10  # Investment horizon in years
time_steps = 10  # Number of time steps
mu = 0.1  # Expected return
sigma = 0.2  # Volatility

# Discretize state space
wealth_levels = np.linspace(0, target_wealth, num=21)  # Discretize wealth into 21 levels
time_intervals = np.linspace(0, investment_horizon, num=time_steps+1)  # Discretize time

# Define utility function (e.g., exponential utility)
def utility_function(wealth):
    return np.log(wealth + 1)  # Avoid log(0)

# Initialize value function array
value_function = np.zeros((len(wealth_levels), len(time_intervals)))

# Set terminal condition
value_function[:, -1] = utility_function(wealth_levels)

# Define function to compute next wealth using GBM
def next_wealth(wealth, allocation, mu, sigma, dt):
    return wealth * np.exp((mu - 0.5 * sigma**2) * dt + sigma * np.sqrt(dt) * np.random.normal())

# Backward induction
for t in reversed(range(len(time_intervals) - 1)):
    for w in range(len(wealth_levels)):
        expected_values = []
        for allocation in np.linspace(0, 1, num=11):  # Different allocations from 0 to 100%
            next_w = next_wealth(wealth_levels[w], allocation, mu, sigma, time_intervals[1] - time_intervals[0])
            next_w_index = np.searchsorted(wealth_levels, next_w, side='right') - 1
            next_w_index = min(next_w_index, len(wealth_levels) - 1)
            expected_value = value_function[next_w_index, t + 1]
            expected_values.append(expected_value)
        value_function[w, t] = np.max(expected_values)

# Extract optimal policy
optimal_policy = np.zeros((len(wealth_levels), len(time_intervals) - 1))
for t in range(len(time_intervals) - 1):
    for w in range(len(wealth_levels)):
        allocation_values = []
        for allocation in np.linspace(0, 1, num=11):
            next_w = next_wealth(wealth_levels[w], allocation, mu, sigma, time_intervals[1] - time_intervals[0])
            next_w_index = np.searchsorted(wealth_levels, next_w, side='right') - 1
            next_w_index = min(next_w_index, len(wealth_levels) - 1)
            expected_value = value_function[next_w_index, t + 1]
            allocation_values.append(expected_value)
        optimal_policy[w, t] = np.linspace(0, 1, num=11)[np.argmax(allocation_values)]

# Output the optimal policy
optimal_policy_df = pd.DataFrame(optimal_policy, columns=[f'Time {t}' for t in range(len(time_intervals) - 1)], index=wealth_levels)
print(optimal_policy_df)

# Save the optimal policy to an Excel file
optimal_policy_df.to_excel("optimal_policy.xlsx", sheet_name='Optimal Policy')

print("Optimal policy saved to 'optimal_policy.xlsx'.")


          Time 0  Time 1  Time 2  Time 3  Time 4  Time 5  Time 6  Time 7  \
0.0          0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
10000.0      0.0     0.0     0.0     0.0     0.3     0.0     0.0     0.1   
20000.0      0.1     0.1     0.6     0.0     0.6     0.0     0.0     0.7   
30000.0      0.2     0.0     0.1     0.3     0.7     0.0     0.6     0.5   
40000.0      0.0     0.0     0.0     0.1     0.7     0.0     0.1     0.0   
50000.0      0.0     0.0     0.0     0.0     0.0     0.0     0.4     0.3   
60000.0      0.0     0.0     0.0     0.1     0.0     0.0     0.2     0.2   
70000.0      0.0     0.0     0.0     0.0     0.0     0.0     0.5     0.0   
80000.0      0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.2   
90000.0      0.0     0.0     0.0     0.0     0.0     0.0     0.1     0.1   
100000.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
110000.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
120000.0    