In [134]:
# importing src directory
import sys
sys.path.append('..')
# experiment imports
import os
import math
import numpy as np
import random
from scipy.stats import truncnorm
from scipy import integrate
import matplotlib.pyplot as plt
import pandas as pd
# project imports
from amm.amm import AMM, SimpleFeeAMM
from amm.fee import TriangleFee, PercentFee, NoFee
# data imports
from data.kaiko import fetch_data
from api_key.my_api_key import api_key

In [135]:
def calibrate_gbm(T, N, dt, asset, start_date, end_date, freq, api_key):
    """
    calibrate geometric brownian motion
    
    mu (float): drift coefficient
    sigma (float): diffusion coefficient
    S0 (float): initial value
    T (float): terminal time
    N (int): number of time steps
    dt (float): time step size
    asset (str): token for aset (e.g. btc, eth)
    start_date (str): start date for data (YYYY-MM-DDT00:00:00)
    end_date (str): end date for data (YYYY-MM-DDT00:00:00)
    freq (str): frequency of data (1h, 1d, 1w)

    return numpy.ndarray: simulated gbm path
    """

    # check if data exists, if not fetch data
    if os.path.exists(f"/analyze/crypto_data/{asset}-usd_{start_date}_{end_date}_{freq}.csv"):
        data =  pd.read_csv(f"/analyze/crypto_data/{asset}-usd_{start_date}_{end_date}_{freq}.csv")["price"]
    else: data = fetch_data(api_key, asset, start_date, end_date, freq)

    # use data to calibrate gbm
    returns = np.log(data / data.shift(1)) # get returns
    mu = returns.mean() * 252  # annualized return
    sigma = returns.std() * 252 ** 0.5 # annualized volatility
    S0 = data.iloc[-1] # get LAST price in series
    # generate gbm path
    t = np.linspace(0, T, N)
    W = np.random.standard_normal(size=N)
    W = np.cumsum(W) * np.sqrt(dt)  # Standard Brownian motion
    X = (mu - 0.5 * sigma**2) * t + sigma * W 
    S = S0 * np.exp(X)  # Geometric Brownian motion
    
    return S

In [136]:

def sim1(n, pair, start_dt, end_dt, frequency):
    """
    simulate AMM market with data calibrated GBM for external oracles and trading agents
    n (int): number of simulations
    pair (str): asset pair for data (e.g. btc-eth)
    asset1_n (int): number of asset1 tokens
    asset2_n (int): number of asset2 tokens
    start_dt (str): start date for data (YYYY-MM-DD)
    end_dt (str): end date for data (YYYY-MM-DD)
    frequency (str): frequency of data (1h, 1d, 1w)
    return list: list of dataframes for each simulation 
    """

    # # SIM STORAGE # #
    # create list to store dfs from each simulation of amms
    amm_sims = [] 
    # parse asset1 and asset2, create USD denominated pairs
    asset1 = pair.split("-")[0] 
    asset2 = pair.split("-")[1]
    # fetch data from kaiko - WON'T vary per simulation    
    asset1_data = pd.DataFrame(fetch_data(api_key, asset1 + "-usd", start_dt, end_dt, frequency))
    asset2_data = pd.DataFrame(fetch_data(api_key, asset2 + "-usd", start_dt, end_dt, frequency)) 
    # convert timestamp to datetime
    asset1_data['timestamp'] = pd.to_datetime(asset1_data['timestamp'], unit='ms') 
    asset2_data['timestamp'] = pd.to_datetime(asset2_data['timestamp'], unit='ms')
    # convert price to numeric
    asset1_data['price'] = pd.to_numeric(asset1_data['price'])
    asset2_data['price'] = pd.to_numeric(asset2_data['price'])
    # merge dataframes on timestamp saving price for each asset denominated in USD for storing AMM market data
    marketDF = pd.merge(asset1_data, asset2_data, on='timestamp', how='inner', suffixes=("_" + asset1, "_" + asset2))
    # calculate market ratio of asset1/asset2
    marketDF[f'mrkt_{asset1}/{asset2}'] = marketDF[f'price_{asset1}'] / marketDF[f'price_{asset2}'] 
    # add columns for trade tracking (amm ratio, inventory, averages)
    new_cols = [f'amm_{asset1}/{asset2}', f'{asset1}_inv', f'{asset2}_inv', 'L_inv']
    marketDF = marketDF.assign(**{col: None for col in new_cols})
    # add columns for moving averages
    marketDF[f'20mavg_{asset1}'] = marketDF[f'price_{asset1}'].rolling(window=20).mean()
    marketDF[f'50mavg_{asset1}'] = marketDF[f'price_{asset1}'].rolling(window=50).mean()
    marketDF[f'200mavg_{asset1}'] = marketDF[f'price_{asset1}'].rolling(window=200).mean()
    marketDF[f'20mavg_{asset2}'] = marketDF[f'price_{asset2}'].rolling(window=20).mean()
    marketDF[f'50mavg_{asset2}'] = marketDF[f'price_{asset2}'].rolling(window=50).mean()
    marketDF[f'200mavg_{asset2}'] = marketDF[f'price_{asset2}'].rolling(window=200).mean()
    # # TIME SERIES SIMULATIONS # #
    # for each simulation create new set of amms & run new set of trades
    for simulation in range(n):
        # create new market df for each simulation
        market = marketDF.copy() 
        # setup amms to simulate
        nofeeAMM = SimpleFeeAMM(fee_structure = NoFee()) 
        percentAMM = SimpleFeeAMM(fee_structure = PercentFee(0.01))
        triAMM = SimpleFeeAMM(fee_structure = TriangleFee(0.003, 0.0001, -1)) 
        # setup new set of dfs to save simulations
        amm_cols = [f'{asset1}_inv', f'{asset2}_inv', 'L_inv', f'{asset1}', f'{asset2}', 'L', f'F{asset1}', f'F{asset2}', 'FL']
        percentDF = pd.DataFrame(columns=amm_cols)
        nofeeDF = pd.DataFrame(columns=amm_cols)
        triDF = pd.DataFrame(columns=amm_cols)
        # store pairs of amm type & df for updating
        amms = [(nofeeAMM, nofeeDF), (percentAMM, percentDF), (triAMM, triDF)]

# TODO: [1] add calibration & path generation (change mavgs + agents accordingly) - check assumptions of GBM
        # [2] train-test calibration-period splitting
        # [3] multiple price streams for multiple external oracles

        # iterate over each timestep in crypto market data
        for t in range(len(asset1_data)):
            # check momentum
            if marketDF[f'20mavg_{asset1}'][t] > (marketDF[f'50mavg_{asset1}'][t] * 1.05) and marketDF[f'20mavg_{asset2}'][t] < marketDF[f'50mavg_{asset2}'][t]:
                asset_out, asset_in, asset_in_n = asset1, asset2, random.choice(list(range(1, 50)))
            if marketDF[f'20mavg_{asset2}'][t] > (marketDF[f'50mavg_{asset2}'][t] * 1.05) and marketDF[f'20mavg_{asset1}'][t] < marketDF[f'50mavg_{asset1}'][t]:
                asset_out, asset_in, asset_in_n = asset2, asset1, random.choice(list(range(1, 50)))
            # check value
            if marketDF[f'50mavg_{asset1}'][t] < marketDF[f'200mavg_{asset1}'][t] and marketDF[f'50mavg_{asset2}'][t] > marketDF[f'200mavg_{asset2}'][t]:
                asset_out, asset_in, asset_in_n = asset1, asset2, random.choice(list(range(1, 50)))
            if marketDF[f'50mavg_{asset2}'][t] < marketDF[f'200mavg_{asset2}'][t] and marketDF[f'50mavg_{asset1}'][t] > marketDF[f'200mavg_{asset1}'][t]:
                asset_out, asset_in, asset_in_n = asset2, asset1, random.choice(list(range(1, 50)))
            # check arbitrage
            if marketDF[f'amm_{asset1}/{asset2}'][t] > (marketDF[f'market_{asset1}/{asset2}'][t] * 1.005):
                asset_out, asset_in, asset_in_n = asset1, asset2, random.choice(list(range(1, 50)))
            if (marketDF[f'amm_{asset1}/{asset2}'][t] * 1.005) < marketDF[f'market_{asset1}/{asset2}'][t]:
                asset_out, asset_in, asset_in_n = asset2, asset1, random.choice(list(range(1, 50)))

# TODO: swap around order so call trades at each if check and execute right away so no order aggregation issue at each step
            
            # update market data with amm data
            for amm, df in amms:
                # call trade for each AMM
                succ, info = amm.trade_swap(asset_out, asset_in, asset_in_n)
                # add row to df for each trade
                new_row = {f'{asset1}_inv': amm.portfolio[asset1], f'{asset2}_inv': amm.portfolio[asset2], 'LInv': amm.portfolio['L'], # add trade info to df
                        asset1: info['asset_delta'][asset1], f'{asset2}': info['asset_delta'][asset2], 'L': info['asset_delta']['L'], 
                        f'F{asset1}': amm.fees[asset1], f'F{asset2}': amm.fees[asset2], 'FL': amm.fees['L']}
                # append new row to df
                df = df.append(new_row, ignore_index=True)

    return amm_sims # return list of dfs for each simulation

In [137]:

sim1(2, "btc-eth", '2023-02-01T00:00:00Z', '2024-03-01T00:00:00Z', "1d")







### NEXT COMMIT
# git commit -m "reorganized some files, built out all simulation code, working on adding agents calls -- the simulation currently will be called for set time window, asset, number of simulations, and frequency and will then pull corresponding data from kaiko, calibrate gbm, will run and save the simulaiton data once the agent calls are integrated"   

Index(['timestamp', 'price_btc', 'price_eth', 'mrkt_btc/eth', 'amm_btc/eth',
       'btc_inv', 'eth_inv', 'L_inv', 'btc20avg', 'eth20avg', 'btc50avg',
       'eth50avg', 'btc200avg', 'eth200avg'],
      dtype='object')
   timestamp     price_btc    price_eth  mrkt_btc/eth amm_btc/eth btc_inv  \
0 2023-02-01  23182.447712  1593.030093     14.552423        None    None   
1 2023-02-02  23838.319859  1674.235579     14.238331        None    None   
2 2023-02-03  23460.165909  1651.353372     14.206630        None    None   
3 2023-02-04  23405.345682  1670.274208     14.012876        None    None   
4 2023-02-05  23083.502229  1640.240757     14.073240        None    None   

  eth_inv L_inv btc20avg eth20avg btc50avg eth50avg btc200avg eth200avg  
0    None  None     None     None     None     None      None      None  
1    None  None     None     None     None     None      None      None  
2    None  None     None     None     None     None      None      None  
3    None  None     No

[]