# Value Function Training Algorithm for AMM

## Model Architecture
- Neural network with 3 input nodes (p, x, y)
- 3 hidden layers with 64 nodes each and ReLU activation
- 1 output node (value)
- Uses both main network and target network for stable training

## Parameters
- L: Constant product parameter (L² = x*y)
- γ: Fee rate (0.003)
- σ: Price volatility (0.5)
- δt: Time step (1)
- μ: Price drift (0.0)
- Fee model: 'distribute'
- Fee source: 'incoming' or 'outgoing'

## Training Process

### For each epoch:
1. Generate training data:
   - Sample x values uniformly from [50, 150]
   - Calculate y = L²/x
   - Calculate price bounds: p_min = (y/x)(1-γ), p_max = (y/x)/(1-γ)
   - Sample p values uniformly between bounds

2. For each batch:

   a. Calculate target values:
      - Use Gauss-Legendre quadrature (50 points) for numerical integration
      - For each state (p,x,y):
        * Generate future price points using log-normal distribution
        * Calculate new (x,y) positions based on AMM mechanics
        * Compute future values using target network
        * Calculate expected value through numerical integration
         $$
         E[V(p')] = \sum_{i}^{50} w_i \cdot V(p'_i) \cdot f(p'_i \mid p)
         $$

         where:
         - $w_i$ are the Gauss-Legendre weights
         - $V(p'_i)$ is the value function at future price $p'_i$
         - $f(p'_i \mid p)$ is the log-normal PDF:

   b. Update main network:
      - Forward pass to get predicted values
      - Calculate MSE loss
      - Backpropagate and update weights
      - Clip gradients at norm 1.0

   c. Update target network:
      - Soft update with τ = 0.0005
      - θ_target = τ*θ_current + (1-τ)*θ_target

3. Adjust learning rate:
   - Decay by factor 0.95 every 10 epochs

### Training Configuration
- Number of epochs: 200
- Batch size: 64
- Initial learning rate: 0.0001
- Samples per epoch: 500


In [None]:
from dp import main

main()


In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tabulate import tabulate
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class ValueFunctionNN(nn.Module):
    def __init__(self, hidden_dim=64, normalize=True):
        super(ValueFunctionNN, self).__init__()
        self.normalize = normalize
        
        # Statistics for normalization
        self.register_buffer('L', torch.tensor(1.0))
        
        # Neural network layers
        self.network = nn.Sequential(
            nn.Linear(3, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )
    
    def normalize_input(self, state):
        if not self.normalize:
            return state
            
        normalized = torch.zeros_like(state)
        normalized[:, 1] = state[:, 1] / self.L
        normalized[:, 2] = state[:, 2] / self.L
        return normalized
    
    def forward(self, state):
        # Normalize the input state
        normalized_state = self.normalize_input(state)
        # Process through the network
        return self.network(normalized_state)

def load_model(model_path):
    try:
        # Create model instance
        model = ValueFunctionNN()
        
        # Load state dict
        state_dict = torch.load(model_path, map_location=torch.device('cpu'))
        
        # Handle different state dict formats
        if isinstance(state_dict, dict):
            if 'state_dict' in state_dict:
                state_dict = state_dict['state_dict']
            elif 'model_state_dict' in state_dict:
                state_dict = state_dict['model_state_dict']
        
        # Load the state dict
        model.load_state_dict(state_dict)
        model.eval()
        
        logger.info(f"Successfully loaded model from {model_path}")
        return model
    except Exception as e:
        logger.error(f"Error loading model {model_path}: {str(e)}")
        return None

# Rest of your code remains the same, starting from:
# Load both models
incoming_model_path = '/home/shiftpub/Dynamic_AMM/inf_step_exp/mc_value_network_distribute_incoming.pth'
outgoing_model_path = '/home/shiftpub/Dynamic_AMM/inf_step_exp/mc_value_network_distribute_outgoing.pth'

try:
    incoming_model = load_model(incoming_model_path)
    print("Incoming fee model loaded successfully") 
except Exception as e:
    print(f"Error loading incoming model: {e}")
    incoming_model = None

try:
    outgoing_model = load_model(outgoing_model_path)
    print("Outgoing fee model loaded successfully")
except Exception as e:
    print(f"Error loading outgoing model: {e}")
    outgoing_model = None

# Function to calculate immediate reward
def immediate_reward(state):
    p, x, y = state
    return p * x + y

# Function to generate test states across the entire valid state space
def generate_test_states(L=100, gamma=0.003, num_x=1000, num_p_per_x=10):
    test_states = []
    
    # Generate x values in range [50, 150]
    x_values = np.linspace(L*0.95, L*1.05, num_x)
    
    for x_val in x_values:
        # Calculate corresponding y value
        y_val = L**2 / x_val
        # Calculate price ratio
        price_ratio = y_val / x_val
        
        # Calculate valid price range
        p_min = price_ratio * (1 - gamma)
        p_max = price_ratio / (1 - gamma)
        
        # Generate evenly spaced prices for each x,y pair
        # p_values = np.linspace(p_min, p_max, num_p_per_x)
        
        # for p_val in p_values:
        #     test_states.append([p_val, x_val, y_val])
        test_states.append([price_ratio, x_val, y_val])
    
    return torch.tensor(test_states, dtype=torch.float32)

# Generate a comprehensive set of test states
# AMM constants
L = 10000
gamma = 0.003
test_states = generate_test_states(L=L, gamma=gamma, num_x=1000, num_p_per_x=100)
print(f"Generated {len(test_states)} test states")

# Evaluate both models on the test states
results = []

for i, state in enumerate(test_states):
    p_val, x_val, y_val = state.tolist()
    imm_reward = immediate_reward(state)
    
    # Get predictions from both models
    incoming_value = incoming_model(state.unsqueeze(0)).item() if incoming_model else float('nan')
    outgoing_value = outgoing_model(state.unsqueeze(0)).item() if outgoing_model else float('nan')
    
    # Calculate value difference
    value_diff = outgoing_value - incoming_value
    
    # Calculate normalized position within price bounds
    price_ratio = y_val / x_val

    
    results.append({
        'state_idx': i,
        'p': p_val,
        'x': x_val,
        'y': y_val,
        'immediate_reward': imm_reward,
        'incoming_value': incoming_value,
        'outgoing_value': outgoing_value,
        'value_diff': value_diff,
    })

# Convert to DataFrame for easier analysis
df = pd.DataFrame(results)


In [None]:
# Create a figure
plt.figure(figsize=(12, 6))

# Box plot of value differences across x quantiles
x_quantiles = pd.qcut(df['x'], 10, labels=[f'Q{i+1}' for i in range(10)])
plt.boxplot([df[x_quantiles == q]['value_diff'] for q in x_quantiles.unique()])
plt.xlabel('X Quantiles')
plt.ylabel('Value Difference')
plt.title('Value Difference Distribution Across X Quantiles')
plt.grid(True)

# Add quantile range information
x_ranges = pd.qcut(df['x'], 10)
x_range_labels = [f'{q.left:.1f}-{q.right:.1f}' for q in x_ranges.unique()]
plt.xticks(range(1, 11), x_range_labels, rotation=45)

plt.tight_layout()
plt.show()

In [None]:
print(df.to_markdown())

In [None]:
import matplotlib.pyplot as plt

plt.plot(df['x'], df['value_diff'])