# Deep Deterministic Policy Gradient (DDPG) Development

To enable dynamic liquidity provision strategy

In [1]:
import random
import time
import pandas as pd
import matplotlib as mpl

from tqdm import tqdm
from datetime import datetime

from api.definer.core import *
from api.definer.ddpg import *
from api.definer.data import graph

import warnings
warnings.simplefilter("ignore")
mpl.style.use('default')

In [2]:
random_state = 0
random.seed(random_state)
np.random.seed(random_state)
torch.random.manual_seed(random_state)

print('Set random seed: {}'.format(random_state))

Set random seed: 0


In [3]:
LIVE = False
SAVE = False
AROUND = True

## Collect Historical Data in USDC-ETH 0.05% Uniswap V3 pool

In [4]:
# USDC-ETH 0.05% Uniswap V3 pool on Ethereum chain
if LIVE:
    address = "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640"
    after = '2022-01-01'
    dpd = graph(address, int(datetime.timestamp(datetime.strptime(after, "%Y-%m-%d"))))
    # dpd.to_csv('./data/uniswap_v3_usdc_eth.csv')
else:
    dpd = pd.read_csv('./data/uniswap_v3_usdc_eth.csv', index_col=[0])

In [5]:
decimal0 = dpd.iloc[0]['pool.token0.decimals']
decimal1 = dpd.iloc[0]['pool.token1.decimals']
decimal = decimal1 - decimal0
dpd['fg0'] = ((dpd['feeGrowthGlobal0X128']) / (2 ** 128)) / (10 ** decimal0)
dpd['fg1'] = ((dpd['feeGrowthGlobal1X128']) / (2 ** 128)) / (10 ** decimal1)
dpd['fg0shift'] = dpd['fg0'].shift(-1)
dpd['fg1shift'] = dpd['fg1'].shift(-1)
dpd['fee0token'] = dpd['fg0'] - dpd['fg0shift']
dpd['fee1token'] = dpd['fg1'] - dpd['fg1shift']
print('starting price:', round(dpd['close'].iloc[-1], 2))

starting price: 1995.85


## Initialize and Training DDPG Model

In [6]:
base = 0
n = 1
mini = 1500
maxi = 2500
target = dpd['close'].iloc[-1] * n

In [7]:
SMIN, SMAX = np.sqrt(mini * 10 ** decimal), np.sqrt(maxi * 10 ** decimal)
amount0, amount1, deltaL = get_initial_wealth(base, dpd, decimal, SMIN, SMAX, target)
liq = get_liquidity(dpd['price0'].iloc[-1], mini, maxi, amount0, amount1, decimal0, decimal1)
dpd = get_fee(dpd, base, mini, maxi, liq, decimal0, decimal1, decimal)

In [8]:
p_std = dpd.close.std()
v_std = dpd.amountV.std()
p_mean = dpd.close.mean()
v_mean = dpd.amountV.mean()

In [9]:
EPISODES = 2
EP_STEPS = 1000 - 2

MEMORY_CAPACITY = 500

s_dim = 2
a_dim = 2
a_high_bound = 10
a_low_bound = -10

ddpg = DDPG(a_dim, s_dim, a_high_bound)
var = 1  # the controller of exploration which will decay during training process

In [10]:
t1 = time.time()
r_list = []

for i in range(EPISODES):
    
    dpd_step = dpd.copy()
    
    s = np.array([
        (dpd_step.iloc[-1].close - p_mean) / p_std,
        (dpd_step.iloc[-1].amountV - v_mean) / v_std
    ])
    
    ep_r = 0
    ep_r_list = []
    r = 0
    
    pbar = tqdm(range(EP_STEPS))
    for j in pbar:
        
        pbar.set_description("episode: {}, step reward: {} \t".format(i + 1, round(r, 3)))
        
        dpd_step = dpd_step.iloc[:-1]
        # target = dpd_step['close'].iloc[-1]
        target = dpd_step['amountV'].iloc[-1]
        
        # p_mean = dpd_step.iloc[-1].close
        # v_mean = dpd_step.iloc[-1].amountV
        
        a = ddpg.choose_action(s)
        a = np.clip(np.random.normal(a, var), a_low_bound, a_high_bound)
        (a[0], a[1]) = (a[1], a[0]) if a[0] > a[1] else (a[0], a[1])
        if a[0] == a[1]: a[1] += 0.000001
        # a[0] = np.clip(a[0], a_low_bound, min(a_high_bound, dpd_step['close'].iloc[-1] / 1000))
        # a[1] = np.clip(a[1], max(a_low_bound, dpd_step['close'].iloc[-1] / 1000), a_high_bound)
        
        # action = a * p_std + p_mean
        action = a * p_std + dpd_step.iloc[-1].close
        mini, maxi = action[0], action[1]
        SMIN, SMAX = np.sqrt(mini * 10 ** decimal), np.sqrt(maxi * 10 ** decimal)
        
        amount0, amount1, deltaL = get_initial_wealth(base, dpd_step, decimal, SMIN, SMAX, target)
        liq = get_liquidity(dpd_step['price0'].iloc[-1], mini, maxi, amount0, amount1, decimal0, decimal1)
        dpd_step = get_fee(dpd_step, base, mini, maxi, liq, decimal0, decimal1, decimal)
        
        state_next = dpd_step.iloc[-2]
        s_ = np.array([
            (state_next.close - p_mean) / p_std,
            (state_next.amountV - v_mean) / v_std
        ])
        r = state_next.feeusd
        # print('step: {} \t reward: {}'.format(episode, r))
        
        # if r > 0.0001:
        ddpg.store_transition(s, a, r, s_) # store the transition to memory
        
        if ddpg.pointer > MEMORY_CAPACITY:
            var *= 0.999 # decay the exploration controller factor
            ddpg.learn()
            
        s = s_
        ep_r += r
        ep_r_list.append(r)
        
        # print('step: {} \t reward: {} \t action: [{}, {}]'.
        #       format(j, round(r, 4), round(mini, 2), round(maxi, 2)))
        
    print('Episode: ', i + 1, '\t Reward: %i' % ep_r, '\t Explore: %.2f' % var)
    
    r_list.append(ep_r_list)

print('Running time: ', time.time() - t1)

episode: 1, step reward: 0.257 	: 100%|██████████| 998/998 [03:40<00:00,  4.53it/s] 


Episode:  1 	 Reward: 267 	 Explore: 0.61


episode: 2, step reward: 0.32 	: 100%|██████████| 998/998 [03:47<00:00,  4.39it/s] 

Episode:  2 	 Reward: 229 	 Explore: 0.22
Running time:  447.60243582725525





In [11]:
pd.DataFrame(r_list).to_csv('./results/ddpg_training_rewards.csv')

## Testing Trained DDPG Model

In [13]:
dpd_step = dpd.copy()

a_list = []

s = np.array([
    (dpd.iloc[-1].close - p_mean) / p_std,
    (dpd.iloc[-1].amountV - v_mean) / v_std
])

ep_r = 0
r = 0

pbar = tqdm(range(EP_STEPS))
for j in pbar:
    
    step_dict = {}
    pbar.set_description("step reward: {} \t".format(round(r, 3)))
    
    dpd_step = dpd_step.iloc[:-1]
    target = dpd_step['amountV'].iloc[-1]
    
    # p_mean = dpd_step.iloc[-1].close
    # v_mean = dpd_step.iloc[-1].amountV
    
    a = ddpg.choose_action(s)
    a = np.clip(np.random.normal(a, 0), a_low_bound, a_high_bound)
    (a[0], a[1]) = (a[1], a[0]) if a[0] > a[1] else (a[0], a[1])
    if a[0] == a[1]: a[1] += 0.000001

    action = a * p_std + dpd_step.iloc[-1].close
    mini, maxi = action[0], action[1]
    SMIN, SMAX = np.sqrt(mini * 10 ** decimal), np.sqrt(maxi * 10 ** decimal)
    
    step_dict['mini'] = action[0]
    step_dict['maxi'] = action[1]
    step_dict['price'] = dpd_step['close'].iloc[-1]
    
    amount0, amount1, deltaL = get_initial_wealth(base, dpd_step, decimal, SMIN, SMAX, target)
    liq = get_liquidity(dpd_step['price0'].iloc[-1], mini, maxi, amount0, amount1, decimal0, decimal1)
    dpd_step = get_fee(dpd_step, base, mini, maxi, liq, decimal0, decimal1, decimal)
    
    state_next = dpd_step.iloc[-2]
    s_ = np.array([
        (state_next.close - p_mean) / p_std,
        (state_next.amountV - v_mean) / v_std
    ])
    r = state_next.feeusd
        
    s = s_
    ep_r += r
    
    step_dict['feeusd'] = r 
    a_list.append(step_dict)

step reward: 0.0 	: 100%|██████████| 998/998 [03:38<00:00,  4.56it/s]  


In [14]:
print('Total fee earnings estimated with dynamic minting using DDPG:', 
      round(pd.DataFrame(a_list).feeusd.sum(), 2))

Total fee earnings estimated with dynamic minting using DDPG: 255.18


In [17]:
pd.DataFrame(a_list).to_csv('./results/ddpg_actions.csv')

In [18]:
torch.save(ddpg.actor_eval.state_dict(), './results/ddpg_actor_eval.pt')
torch.save(ddpg.actor_target.state_dict(), './results/ddpg_actor_target.pt')
torch.save(ddpg.critic_eval.state_dict(), './results/ddpg_critic_eval.pt')
torch.save(ddpg.critic_target.state_dict(), './results/ddpg_critic_target.pt')