In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim

gamma = 0.9

# Neural Network for Value Function Approximation
class ValueNetwork(nn.Module):
    def __init__(self):
        super(ValueNetwork, self).__init__()
        self.fc1 = nn.Linear(1, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the network
value_net = ValueNetwork()
optimizer = optim.Adam(value_net.parameters(), lr=0.001)

# Training Loop
for epoch in range(1000):
    total_loss = 0
    for i in range(len(dividends)):
        # Current state
        state = torch.tensor([dividends[i]], dtype=torch.float32)

        # Compute reward
        reward = dividends[i]  # Assuming reward is the dividend

        # Next state value
        with torch.no_grad():
            next_state_val = value_net(torch.tensor([dividends[(i + 1) % len(dividends)]], dtype=torch.float32))

        # Bellman equation
        target = reward + gamma * next_state_val

        # Current state value
        current_val = value_net(state)

        # TD Error
        loss = (current_val - target).pow(2)
        total_loss += loss.item()

        # Update the network
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {total_loss / len(dividends)}")

# Generate Stock Prices from Value Function
stock_prices_from_v = []
for div in dividends_range.flatten():
    v = value_net(torch.tensor([div], dtype=torch.float32)).item()
    stock_price = v / gamma  # Assuming pi_t is constant
    stock_prices_from_v.append(stock_price)

# Plotting the Stock Prices vs. Dividends
plt.figure(figsize=(10, 6))
plt.scatter(dividends, stock_prices, label='Synthetic Data', color='blue', alpha=0.5)
plt.plot(dividends_range, stock_prices_from_v, label='Estimated Stock Prices', color='green')
plt.title('Estimated Stock Prices as a Function of Dividends')
plt.xlabel('Dividends')
plt.ylabel('Stock Prices')
plt.legend()
plt.grid(True)
plt.show()
