<a href="https://colab.research.google.com/github/alunfes/1m-btc-data/blob/master/Cartpole_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install stable-baselines3
!pip install torchinfo


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting stable-baselines3
  Downloading stable_baselines3-1.7.0-py3-none-any.whl (171 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m171.8/171.8 KB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Collecting gym==0.21
  Downloading gym-0.21.0.tar.gz (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting importlib-metadata~=4.13
  Downloading importlib_metadata-4.13.0-py3-none-any.whl (23 kB)
Building wheels for collected packages: gym
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py bdist_wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem w

In [2]:
import numpy as np
import pandas as pd
import gym
import time
import math
import random
import matplotlib
import matplotlib.pyplot as plt
from IPython import display
from collections import namedtuple, deque
from itertools import count
import itertools

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [11]:



class ReplayMemory(object):
    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)
        self.Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))

    def push(self, *args):
        """Save a transition"""
        self.memory.append(self.Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [None]:
class DQN(nn.Module):

    def __init__(self, n_observations, hidden_size, n_actions):
        super(DQN, self).__init__()
        self.layer1 = nn.Linear(n_observations, hidden_size)
        self.layer2 = nn.Linear(hidden_size, hidden_size)
        self.layer3 = nn.Linear(hidden_size, n_actions)

    # Called with either one element to determine next action, or a batch
    # during optimization. Returns tensor([[left0exp,right0exp]...]).
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        return self.layer3(x)

In [None]:
env = gym.make("CartPole-v1")

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# BATCH_SIZE is the number of transitions sampled from the replay buffer
# GAMMA is the discount factor as mentioned in the previous section
# EPS_START is the starting value of epsilon
# EPS_END is the final value of epsilon
# EPS_DECAY controls the rate of exponential decay of epsilon, higher means a slower decay
# TAU is the update rate of the target network
# LR is the learning rate of the AdamW optimizer
BATCH_SIZE = 128
GAMMA = 0.99
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 1000
TAU = 0.005
LR = 1e-4
hidden_size =128

# Get number of actions from gym action space
n_actions = env.action_space.n
# Get the number of state observations
state = env.reset()
n_observations = len(state)

policy_net = DQN(n_observations, hidden_size, n_actions).to(device)
target_net = DQN(n_observations, hidden_size, n_actions).to(device)
target_net.load_state_dict(policy_net.state_dict())

optimizer = optim.AdamW(policy_net.parameters(), lr=LR, amsgrad=True)
memory = ReplayMemory(10000)


steps_done = 0


def select_action(state):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * \
        math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if sample > eps_threshold:
        with torch.no_grad():
            # t.max(1) will return the largest column value of each row.
            # second column on max result is index of where max element was
            # found, so we pick action with the larger expected reward.
            return policy_net(state).max(1)[1].view(1, 1)
    else:
        return torch.tensor([[env.action_space.sample()]], device=device, dtype=torch.long)


episode_durations = []


def plot_durations(show_result=False):
    plt.figure(1)
    durations_t = torch.tensor(episode_durations, dtype=torch.float)
    if show_result:
        plt.title('Result')
    else:
        plt.clf()
        plt.title('Training...')
    plt.xlabel('Episode')
    plt.ylabel('Duration')
    plt.plot(durations_t.numpy())
    # Take 100 episode averages and plot them too
    if len(durations_t) >= 100:
        means = durations_t.unfold(0, 100, 1).mean(1).view(-1)
        means = torch.cat((torch.zeros(99), means))
        plt.plot(means.numpy())

    plt.pause(0.001)  # pause a bit so that plots are updated
    if is_ipython:
        if not show_result:
            display.display(plt.gcf())
            display.clear_output(wait=True)
        else:
            display.display(plt.gcf())

In [None]:
def optimize_model():
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
    # detailed explanation). This converts batch-array of Transitions
    # to Transition of batch-arrays.
    batch = memory.Transition(*zip(*transitions))

    # Compute a mask of non-final states and concatenate the batch elements
    # (a final state would've been the one after which simulation ended)
    non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
                                          batch.next_state)), device=device, dtype=torch.bool)
    non_final_next_states = torch.cat([s for s in batch.next_state
                                                if s is not None])
    state_batch = torch.cat(batch.state)
    action_batch = torch.cat(batch.action)
    reward_batch = torch.cat(batch.reward)

    # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
    # columns of actions taken. These are the actions which would've been taken
    # for each batch state according to policy_net
    state_action_values = policy_net(state_batch).gather(1, action_batch)

    # Compute V(s_{t+1}) for all next states.
    # Expected values of actions for non_final_next_states are computed based
    # on the "older" target_net; selecting their best reward with max(1)[0].
    # This is merged based on the mask, such that we'll have either the expected
    # state value or 0 in case the state was final.
    next_state_values = torch.zeros(BATCH_SIZE, device=device)
    with torch.no_grad():
        next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0]
    # Compute the expected Q values
    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    # Compute Huber loss
    criterion = nn.SmoothL1Loss()
    loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))

    # Optimize the model
    optimizer.zero_grad()
    loss.backward()
    # In-place gradient clipping
    torch.nn.utils.clip_grad_value_(policy_net.parameters(), 100)
    optimizer.step()


In [None]:
if torch.cuda.is_available():
    num_episodes = 600
else:
    num_episodes = 300

for i_episode in range(num_episodes):
    # Initialize the environment and get it's state
    state = env.reset()
    state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
    for t in count():
        action = select_action(state)
        observation, reward, terminated, truncated = env.step(action.item())
        reward = torch.tensor([reward], device=device)
        done = terminated or truncated

        if terminated:
            next_state = None
        else:
            next_state = torch.tensor(observation, dtype=torch.float32, device=device).unsqueeze(0)

        # Store the transition in memory
        memory.push(state, action, next_state, reward)

        # Move to the next state
        state = next_state

        # Perform one step of the optimization (on the policy network)
        optimize_model()

        # Soft update of the target network's weights
        # θ′ ← τ θ + (1 −τ )θ′
        target_net_state_dict = target_net.state_dict()
        policy_net_state_dict = policy_net.state_dict()
        for key in policy_net_state_dict:
            target_net_state_dict[key] = policy_net_state_dict[key]*TAU + target_net_state_dict[key]*(1-TAU)
        target_net.load_state_dict(target_net_state_dict)

        if done:
            episode_durations.append(t + 1)
            plot_durations()
            break

print('Complete')
plot_durations(show_result=True)
plt.ioff()
plt.show()


KeyboardInterrupt: ignored

<Figure size 432x288 with 0 Axes>

In [None]:


'''
class DQNMain:
    def __init__(self):
        self.batch_size = 128
        self.gamma = 0.99
        self.eps_start = 0.9
        self.eps_end = 0.05
        self.eps_decay = 1000
        self.tau = 0.005
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        #matplot
        plt.ion()
        #ENV
        self.env = gym.make("CartPole-v1")
        self.num_actions = self.env.action_space.n
        state = self.env.reset()
        self.num_obs = len(state)
        #NN
        self.hidden_size = 128
        self.policy_net = DQN(self.num_obs, self.hidden_size, self.num_actions).to(self.device)
        self.target_net = DQN(self.num_obs, self.hidden_size, self.num_actions).to(self.device)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.optimizer = optim.AdamW(self.policy_net.parameters(), lr=1e-04, amsgrad=True)
        self.memory = ReplayMemory(10000)


    def select_action(self, state, num_steps):
        sample = random.random()
        eps_threshold = self.eps_end + (self.eps_start - self.eps_end) * math.exp(-1. * num_steps / self.eps_decay)
        if sample > eps_threshold:
            with torch.no_grad():
                return self.policy_net(state).max(1)[1].view(1, 1)
        else:
            return torch.tensor([[self.env.action_space.sample()]], device=self.device, dtype=torch.long)

    def plot_durations(self, episode_durations, show_result=False):
        plt.figure(1)
        durations_t = torch.tensor(episode_durations, dtype=torch.float)
        if show_result:
            plt.title('Result')
        else:
            plt.clf()
            plt.title('Training...')
        plt.xlabel('Episode')
        plt.ylabel('Duration')
        plt.plot(durations_t.numpy())
        # Take 100 episode averages and plot them too
        if len(durations_t) >= 100:
            means = durations_t.unfold(0, 100, 1).mean(1).view(-1)
            means = torch.cat((torch.zeros(99), means))
            plt.plot(means.numpy())
        plt.pause(0.001)  # pause a bit so that plots are updated
        if not show_result:
            display.display(plt.gcf())
            display.clear_output(wait=True)
        else:
            display.display(plt.gcf())


    def optimize_model(self):
        if len(self.memory) < self.batch_size:
            return
        transitions = self.memory.sample(self.batch_size)
        # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
        # detailed explanation). This converts batch-array of Transitions
        # to Transition of batch-arrays.
        batch = self.memory.Transition(*zip(*transitions))

        # Compute a mask of non-final states and concatenate the batch elements
        # (a final state would've been the one after which simulation ended)
        non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
                                            batch.next_state)), device=self.device, dtype=torch.bool)
        non_final_next_states = torch.cat([s for s in batch.next_state
                                                    if s is not None])
        state_batch = torch.cat(batch.state)
        action_batch = torch.cat(batch.action)
        reward_batch = torch.cat(batch.reward)

        state_action_values = self.policy_net(state_batch).gather(1, action_batch)
        next_state_values = torch.zeros(self.batch_size, device=self.device)
        with torch.no_grad():
            next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1)[0]
        expected_state_action_values = (next_state_values * self.gamma) + reward_batch

        criterion = nn.SmoothL1Loss()
        loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))

        self.optimizer.zero_grad()
        loss.backward()
        # In-place gradient clipping
        torch.nn.utils.clip_grad_value_(self.policy_net.parameters(), 100)
        self.optimizer.step()
        return loss

    def soft_update(self, target, source, tau):
        with torch.no_grad():
            for target_param, param in zip(target.parameters(), source.parameters()):
                target_param.data.copy_(tau * param.data + (1.0 - tau) * target_param.data)


    def main_loop(self, num_episodes):
        num_steps_log = []
        
        for i_episode in range(num_episodes):
            state = self.env.reset()
            state = torch.tensor(state, dtype=torch.float32, device=self.device).unsqueeze(0)
            for t in count():
                action = self.select_action(state, t)
                observation, reward, terminated, truncated = self.env.step(action.item())
                reward = torch.tensor([reward], device=self.device)
                done = terminated or truncated

                if terminated:
                    next_state = None
                else:
                    next_state = torch.tensor(observation, dtype=torch.float32, device=self.device).unsqueeze(0)
                self.memory.push(state, action, next_state, reward)
                state = next_state
                self.optimize_model()

                # Soft update of the target network's weights
                # θ′ ← τ θ + (1 −τ )θ′
                '''
                target_net_state_dict = self.target_net.state_dict()
                policy_net_state_dict = self.policy_net.state_dict()
                for key in policy_net_state_dict:
                    target_net_state_dict[key] = policy_net_state_dict[key]*self.tau + target_net_state_dict[key]*(1-self.tau)
                self.target_net.load_state_dict(target_net_state_dict)
                '''
                self.soft_update(self.target_net, self.policy_net, self.tau)

                if done:
                    num_steps_log.append(t + 1)
                    self.plot_durations(num_steps_log)
                    break
        self.plot_durations(num_steps_log, show_result=True)
        plt.ioff()
        plt.show()

dm = DQNMain()
dm.main_loop(300)
'''

In [3]:
from numba import jit, f8, i8
from numba import njit
from numba.experimental import jitclass
import itertools

In [4]:
from google.colab import drive
drive.mount('/content/drive')



Mounted at /content/drive


In [5]:
#check ticker contains in multiple ex
#exclude stable coins
def check_ticker_in_multiple_exchanges(merged_df, num_ex_kijun):
    ex_names = list(merged_df.ex_name)
    base = list(merged_df.base)
    quote = list(merged_df.quote)
    checked_tickers = []
    num_ex_listed = []
    for ticker in base:
        if ticker not in checked_tickers:
            checked_tickers.append(ticker)
            indices_base = [i for i, x in enumerate(quote) if x == ticker]
            indices_quote = [i for i, x in enumerate(base) if x == ticker]
            indices_base.extend(indices_quote)
            exs = [ex_names[i] for i in indices_base]
            exs = list(set(exs))
            if len(exs) >= num_ex_kijun:
                num_ex_listed.append({'ticker':ticker, 'exs':exs})
    target = [x['ticker'] for i, x in enumerate(num_ex_listed)]
    result = merged_df[merged_df['base'].isin(target) | merged_df['quote'].isin(target)]
    return result.reset_index(drop=True)


def check_ticker_in_multiple_exchanges_ex_stable(merged_df, num_ex_kijun, flg_exclude_stables):
    ex_names = list(merged_df.ex_name)
    base = list(merged_df.base)
    quote = list(merged_df.quote)
    checked_tickers = []
    num_ex_listed = []
    stable_coins = ['USD', 'USDT', 'USDC', 'USDK', 'BUSD', 'DAI']
    for ticker in base:
        if ticker not in checked_tickers and ticker not in stable_coins:
            checked_tickers.append(ticker)
            indices_base = [i for i, x in enumerate(quote) if x == ticker]
            indices_quote = [i for i, x in enumerate(base) if x == ticker]
            indices_base.extend(indices_quote)
            exs = [ex_names[i] for i in indices_base]
            exs = list(set(exs))
            if len(exs) >= num_ex_kijun:
                num_ex_listed.append({'ticker':ticker, 'exs':exs})
    for ticker in quote:
        if ticker not in checked_tickers and ticker not in stable_coins:
            checked_tickers.append(ticker)
            indices_base = [i for i, x in enumerate(quote) if x == ticker]
            indices_quote = [i for i, x in enumerate(base) if x == ticker]
            indices_base.extend(indices_quote)
            exs = [ex_names[i] for i in indices_base]
            exs = list(set(exs))
            if len(exs) >= num_ex_kijun:
                num_ex_listed.append({'ticker':ticker, 'exs':exs})
    target = [x['ticker'] for i, x in enumerate(num_ex_listed)]
    result = merged_df[merged_df['base'].isin(target) | merged_df['quote'].isin(target)]
    return result.reset_index(drop=True)


'''
exs = check_ticker_in_multiple_exchanges(merged_df, 3)
print(exs)
'''



'\nexs = check_ticker_in_multiple_exchanges(merged_df, 3)\nprint(exs)\n'

In [6]:
@njit
def check_sync_symbol(base_a, quote_a, base_b, quote_b):
    sync_symbol = ('','')
    if base_a == quote_b:
        if quote_a != base_b:
            sync_symbol = (base_b, quote_a)
    elif quote_a == base_b:
         if base_a != quote_b:
             sync_symbol = (base_a, quote_b)
    return sync_symbol

In [7]:
@njit
def check_all_combinations(base, quote):
    pair_combinations = []
    n = len(base)
    all_combinations = np.zeros((n**2, 2), dtype=np.int64)
    i = 0
    for x in range(n):
        for y in range(n):
            all_combinations[i, 0] = x
            all_combinations[i, 1] = y
            i += 1
    for i in range(len(all_combinations)):
        base_a = base[all_combinations[i][0]]
        quote_a = quote[all_combinations[i][0]]
        base_b = base[all_combinations[i][1]]
        quote_b = quote[all_combinations[i][1]]
        pair = check_sync_symbol(base_a, quote_a, base_b, quote_b)
        if pair != ('',''):
            pair_combinations.append(all_combinations[i])
            #print(total_df.iloc[d[i][0]].symbol, ' x ', total_df.iloc[d[i][1]].symbol, ' = ', pair)
    return pair_combinations

'''
pair_combinations = check_all_combinations(list(total_df.base), list(total_df.quote))
'''

'\npair_combinations = check_all_combinations(list(total_df.base), list(total_df.quote))\n'

In [8]:
#pairとして成立しているものの合成後のex list, sdie list, symbol, 価格を加える。
@jit
def generate_df_for_pair_combinations(pair_combinations, ex_name, base, quote, ave_price, vol, side, id):
    sync_data = []
    for comb in pair_combinations:
        a_index = comb[0]
        b_index = comb[1]
        a_ave_price = ave_price[a_index]
        b_ave_price = ave_price[b_index]
        a_vol = vol[a_index]
        b_vol = vol[b_index]
        a_side = side[a_index]
        b_side = side[b_index]
        a_symbol = base[a_index] + '/' + quote[a_index]
        b_symbol = base[b_index] + '/' + quote[b_index]
        symbol = check_sync_symbol(base[a_index], quote[a_index], base[b_index], quote[b_index])

        if base[a_index] == 'BADGER' and quote[a_index] == 'BTC' and base[b_index] == 'BADGER' and quote[b_index] == 'BTC':
            print(symbol)

        if symbol != ('', ''):
            sync_data.append({'ex_name': ex_name[a_index] + ',' + ex_name[b_index],
                              'symbol': symbol[0] + '/' + symbol[1],
                              'ave_price': a_ave_price * b_ave_price,
                              'vol': str(a_vol) + ',' + str(b_vol),
                              'base': symbol[0],
                              'quote': symbol[1],
                              'side': a_side + ',' + b_side,
                             'id': str(id[a_index])+','+str(id[b_index])})
    sync_df = pd.DataFrame(sync_data)
    return sync_df
'''    
sync_df = generate_df_for_pair_combinations(pair_combinations, list(total_df.ex_name), list(total_df.base), list(total_df.quote), list(total_df.ave_price), list(total_df.vol), list(total_df.side))
'''


'    \nsync_df = generate_df_for_pair_combinations(pair_combinations, list(total_df.ex_name), list(total_df.base), list(total_df.quote), list(total_df.ave_price), list(total_df.vol), list(total_df.side))\n'

In [9]:
def calc_symbol_min_max_ratio(final_df):
    results = [] #{symbol, min_ind, max_ind, max/min}
    symbols = list(final_df.symbol)
    ave_prices = list(final_df.ave_price)
    indicies = list(final_df.index)
    ids = list(final_df.id)
    current_symbol = symbols[0]    
    start_ind = 0
    if (len(symbols) == len(ave_prices) ==  len(indicies)) == False:
        print('length invalid !')
    for i in range(len(symbols)):
        if current_symbol != symbols[i]:
            min_ind = ave_prices[start_ind : i].index(min(ave_prices[start_ind : i]))
            max_ind = ave_prices[start_ind : i].index(max(ave_prices[start_ind : i]))
            maxmin = ave_prices[start_ind : i][max_ind]/ave_prices[start_ind : i][min_ind] if ave_prices[start_ind : i][min_ind] > 0 else np.NAN
            results.append({'symbol':current_symbol, 'id':str(ids[start_ind : i][min_ind])+','+str(ids[start_ind : i][max_ind]), 'min_ind':indicies[start_ind : i][min_ind], 'max_ind':indicies[start_ind : i][max_ind], 'max/min':maxmin})
            start_ind = i
            current_symbol = symbols[i]
    return pd.DataFrame(results).sort_values('max/min')

'''
min_max_df = calc_symbol_min_max_ratio(final_df)
min_max_df.to_csv('/content/drive/My Drive/minmax_df.csv')
'''

"\nmin_max_df = calc_symbol_min_max_ratio(final_df)\nmin_max_df.to_csv('/content/drive/My Drive/minmax_df.csv')\n"

In [10]:
exchanges = ['okx', 'binance', 'bybit', 'dydx']
vol_size_kijun = 1000
num_listed_ex_kijun = 4
#read data
ex_df = []
for ex in exchanges:
    data = pd.read_csv('/content/drive/My Drive/'+ex+'-prices.csv')
    ex_df.append(data.drop(['Unnamed: 0'], axis=1))
merged_df = pd.concat(ex_df, ignore_index=True)

#add col for base, quote
pair_list = []
for ex_name in merged_df['ex_name'].unique():
    df_ex = merged_df[merged_df['ex_name'] == ex_name]
    for symbol in df_ex['symbol']:
        base, quote = symbol.split('/')
        pair_list.append({'ex_name': ex_name, 'base': base, 'quote': quote})
pair_df = pd.DataFrame(pair_list)
merged_df['base'] = pair_df['base']
merged_df['quote'] = pair_df['quote']
#merged_df = check_ticker_in_multiple_exchanges(merged_df, num_listed_ex_kijun)
merged_df = check_ticker_in_multiple_exchanges_ex_stable(merged_df, num_listed_ex_kijun, True)
print(merged_df)
merged_df['side'] = 'buy'
#generate copied df for sell
copied_df = merged_df.copy()
copied_df['side'] = 'sell'
base = copied_df['base']
quote = copied_df['quote']
copied_df['base'] = quote
copied_df['quote'] = base
copied_df['symbol'] = quote+'/'+base
copied_df['ave_price'] = 1/copied_df['ave_price']
total_df = pd.concat([merged_df, copied_df], ignore_index=True)
total_df = total_df[total_df.vol > vol_size_kijun] #volumeが一定以下のものは削除
total_df = total_df[~total_df['symbol'].str.contains(':')] #USDT:USDTみたいなものは削除。どのようなsymbolなのかを調べる必要がある。
total_df.reset_index(drop=True)
total_df['id'] = list(total_df.index)
#check all combinations available
pair_combinations = check_all_combinations(list(total_df.base), list(total_df.quote))
print('identified ', len(pair_combinations), ' pairs for arb.')
#generate sync df
sync_df = generate_df_for_pair_combinations(pair_combinations, list(total_df.ex_name), list(total_df.base), list(total_df.quote), list(total_df.ave_price), list(total_df.vol), list(total_df.side), list(total_df.id))
final_df = pd.concat([total_df, sync_df], ignore_index=True).reset_index(drop=True)
target_df = final_df.copy()
target_df = target_df[target_df['id'].str.contains(',', na=False) == False]
target_df.to_csv('/content/drive/My Drive/target_df.csv')
final_df["symbol"] = final_df["symbol"].str.replace("USDT|USDC|BUSD", "USD", regex=True)

st = time.time()
final_df = final_df.sort_values("symbol")
#remove nan inf data rows
nan_rows = final_df.loc[final_df.isna().any(axis=1)]
print(len(nan_rows), ' rows contain nan and removed.')
final_df = final_df.dropna()
inf_rows = final_df.loc[final_df.isin([np.inf, -np.inf]).any(axis=1)]
print(len(inf_rows), ' rows contain inf and removed.')
final_df = final_df[~final_df.isin([np.inf, -np.inf]).any(axis=1)].dropna()
#final_df.to_csv('/content/drive/My Drive/final_df.csv')
min_max_df = calc_symbol_min_max_ratio(final_df)
print(time.time()-st)
min_max_df.to_csv('/content/drive/My Drive/minmax_df.csv')

     ex_name     symbol  ave_price           vol   base quote
0        okx    BCD/BTC   0.000007  2.547421e+04    BCD   BTC
1        okx  LUNA/USDC   1.423750  3.285728e+05   LUNA  USDC
2        okx   LINK/BTC   0.000280  3.717639e+04   LINK   BTC
3        okx    NEO/BTC   0.000463  6.105353e+03    NEO   BTC
4        okx   NULS/BTC   0.000011  5.780933e+04   NULS   BTC
...      ...        ...        ...           ...    ...   ...
1234    dydx    ZEC/USD  35.450000  6.438192e+06    ZEC   USD
1235    dydx    SOL/USD  20.455500  5.539325e+07    SOL   USD
1236    dydx  SUSHI/USD   1.233500  8.610171e+06  SUSHI   USD
1237    dydx    ICP/USD   5.375000  4.724167e+06    ICP   USD
1238    dydx    CRV/USD   0.980850  7.917155e+06    CRV   USD

[1239 rows x 6 columns]


Encountered the use of a type that is scheduled for deprecation: type 'reflected list' found for argument 'base' of function 'check_all_combinations'.

For more information visit https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-reflection-for-list-and-set-types

File "<ipython-input-7-3d8b3c190e17>", line 2:
@njit
def check_all_combinations(base, quote):
^

Encountered the use of a type that is scheduled for deprecation: type 'reflected list' found for argument 'quote' of function 'check_all_combinations'.

For more information visit https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-reflection-for-list-and-set-types

File "<ipython-input-7-3d8b3c190e17>", line 2:
@njit
def check_all_combinations(base, quote):
^



identified  402508  pairs for arb.


Compilation is falling back to object mode WITH looplifting enabled because Function "generate_df_for_pair_combinations" failed type inference due to: No implementation of function Function(<class 'str'>) found for signature:
 
 >>> str(float64)
 
There are 10 candidate implementations:
   - Of which 10 did not match due to:
   Overload of function 'str': File: <numerous>: Line N/A.
     With argument(s): '(float64)':
    No match.

During: resolving callee type: Function(<class 'str'>)
During: typing of call at <ipython-input-8-4e7a981e67d6> (25)


File "<ipython-input-8-4e7a981e67d6>", line 25:
def generate_df_for_pair_combinations(pair_combinations, ex_name, base, quote, ave_price, vol, side, id):
    <source elided>
                              'ave_price': a_ave_price * b_ave_price,
                              'vol': str(a_vol) + ',' + str(b_vol),
                              ^

  @jit
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "gen

628  rows contain nan and removed.
10607  rows contain inf and removed.
8.093381643295288


In [54]:
final_df

Unnamed: 0,ex_name,symbol,ave_price,vol,base,quote,side,id
213295,"okx,okx",1INCH/AAVE,0.006568,"63125.493171,6183497.774172",1INCH,AAVE,"sell,buy",12871287
251401,"okx,binance",1INCH/AAVE,0.006567,"2980.4519,474959.7",1INCH,AAVE,"sell,buy",14411441
402550,"dydx,dydx",1INCH/AAVE,0.006566,"10541071.4873,4947605.479",1INCH,AAVE,"sell,buy",24492449
51742,"okx,okx",1INCH/AAVE,0.006568,"6183497.774172,63125.493171",1INCH,AAVE,"buy,sell",214214
141090,"binance,binance",1INCH/AAVE,0.006568,"6504938.4,145536.15",1INCH,AAVE,"buy,sell",855855
...,...,...,...,...,...,...,...,...
61544,"binance,binance",ZRX/ZIL,8.602801,"75541.0,2692965.0",ZRX,ZIL,"buy,sell",260260
39676,"okx,binance",ZRX/ZIL,8.589041,"66347.955,13157879.0",ZRX,ZIL,"buy,sell",161161
284830,"binance,binance",ZRX/ZIL,8.566210,"13157879.0,1208472.0",ZRX,ZIL,"sell,buy",16941694
285161,"binance,binance",ZRX/ZIL,8.602801,"2692965.0,75541.0",ZRX,ZIL,"sell,buy",16951695


In [55]:
min_max_df

Unnamed: 0,symbol,id,min_ind,max_ind,max/min
55278,LINA/TOMO,1864186418641864,302310,302310,1.0
59312,MAGIC/CELR,1149114911491149,198690,198690,1.0
59311,MAGIC/CELO,1149114911491149,198785,198785,1.0
59310,MAGIC/CEL,1255125512551255,207930,207930,1.0
59309,MAGIC/CAKE,1149114911491149,198795,198795,1.0
...,...,...,...,...,...
95706,TRY/BRL,1085108510111011,185158,169899,
95781,TRY/GBP,22862286694694,380162,113573,
97784,USD/BIDR,753753954954,123502,158734,
97790,USD/BRL,10851085913913,185145,152424,


In [11]:
target_df

Unnamed: 0,ex_name,symbol,ave_price,vol,base,quote,side,id


In [78]:
sync_df[sync_df.symbol=='BADGER/IOST']

Unnamed: 0,ex_name,symbol,ave_price,vol,base,quote,side,id
37051,"okx,okx",BADGER/IOST,305.263158,"3134.0,8168035.31",BADGER,IOST,"buy,sell",158158
37159,"okx,binance",BADGER/IOST,307.058824,"3134.0,25359416.0",BADGER,IOST,"buy,sell",158158
147872,"binance,okx",BADGER/IOST,298.479532,"35300.98,8168035.31",BADGER,IOST,"buy,sell",901901
147980,"binance,binance",BADGER/IOST,300.235294,"35300.98,25359416.0",BADGER,IOST,"buy,sell",901901
225187,"okx,okx",BADGER/IOST,305.263158,"8168035.31,3134.0",BADGER,IOST,"sell,buy",13491349
225402,"okx,binance",BADGER/IOST,298.479532,"8168035.31,35300.98",BADGER,IOST,"sell,buy",13491349
281442,"binance,okx",BADGER/IOST,307.058824,"25359416.0,3134.0",BADGER,IOST,"sell,buy",16741674
281657,"binance,binance",BADGER/IOST,300.235294,"25359416.0,35300.98",BADGER,IOST,"sell,buy",16741674


In [27]:
target_df = target_df[target_df['side']=='buy']
dydx_df = target_df[target_df['ex_name']=='dydx']
dydx_df
#df = target_df.sample(frac=1, random_state=41)
#df

Unnamed: 0,ex_name,symbol,ave_price,vol,base,quote,side,id
794,dydx,CELO/USD,0.6115,2824882.053,CELO,USD,buy,1203
795,dydx,LINK/USD,6.7875,9589424.7406,LINK,USD,buy,1204
796,dydx,DOGE/USD,0.07285,13061583.012,DOGE,USD,buy,1205
797,dydx,1INCH/USD,0.5015,4947605.479,1INCH,USD,buy,1206
798,dydx,XMR/USD,151.55,3160687.866,XMR,USD,buy,1207
799,dydx,FIL/USD,6.345,28402142.856,FIL,USD,buy,1208
800,dydx,ETH/USD,1678.95,1384498821.9657,ETH,USD,buy,1209
801,dydx,AAVE/USD,76.375,10541071.4873,AAVE,USD,buy,1210
802,dydx,ATOM/USD,12.0325,10023773.5894,ATOM,USD,buy,1211
803,dydx,MKR/USD,916.5,21265761.466,MKR,USD,buy,1212


In [20]:
"final_df[final_df['id'].str.contains(',', na=False) == False]

Unnamed: 0,ex_name,symbol,ave_price,vol,base,quote,side,id
547,binance,1INCH/BTC,0.000021,474959.7,1INCH,BTC,buy,854
548,binance,1INCH/USD,0.499500,6504938.4,1INCH,USDT,buy,855
193,okx,1INCH/USD,0.499450,6183497.774172,1INCH,USDT,buy,214
578,binance,1INCH/USD,0.501000,1151499.2,1INCH,BUSD,buy,892
197,okx,1INCH/USD,0.502150,615658.5252,1INCH,USDC,buy,218
...,...,...,...,...,...,...,...,...
229,binance,ZRX/ETH,0.000135,75541.0,ZRX,ETH,buy,260
810,dydx,ZRX/USD,0.226500,3043603.47,ZRX,USD,buy,1219
133,okx,ZRX/USD,0.225750,3712719.4657,ZRX,USDT,buy,146
447,binance,ZRX/USD,0.226350,1252750.0,ZRX,BUSD,buy,716


In [53]:
@njit
def calculate_arb_pair_data(final_df, symbols):
    arb_pair_data_list = []
    for s in symbols:
        df = final_df[final_df['symbol'] == s]
        if len(df) == 0:
            continue
        ind_min = np.argmin(df['ave_price'].values)''
        ind_max = np.argmax(df['ave_price'].values)
        max_price = np.max(df['ave_price'].values)
        min_price = np.min(df['ave_price'].values)
        maxmin = max_price / min_price if min_price > 0 else 0
        arb_pair_data_list.append({'symbol': s, 'ind_min': ind_min, 'ind_max': ind_max, 'max_min': maxmin})
    return arb_pair_data_list

calculate_arb_pair_data(, symbols)

TypingError: ignored

In [9]:
'''
ex, base, quote, price
'''
@njit
def compare_all_combination(df_index_n, base, quote, price):
    #generate combination of df index
    all_combinations = np.zeros((df_index_n**2, 2), dtype=np.int64)
    i = 0
    for x in range(df_index_n):
        for y in range(df_index_n):
            all_combinations[i, 0] = x
            all_combinations[i, 1] = y
            i += 1
    #
    pairs = []
    for comb in all_combinations:
        base_a = base[comb[0]]
        quote_a = quote[comb[0]]
        base_b = base[comb[1]]
        quote_b = quote[comb[1]]
        res = check_sync_symbol(base_a, quote_a, base_b, quote_b)
        if res != ('',''):
            pairs.append((comb[0],comb[1]))
    return pairs

pairs = compare_all_combination(6057, list(total_df.base), list(total_df.quote), list(total_df.ave_price))
len(pairs)

Encountered the use of a type that is scheduled for deprecation: type 'reflected list' found for argument 'base' of function 'compare_all_combination'.

For more information visit https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-reflection-for-list-and-set-types

File "<ipython-input-9-2b57c81ed620>", line 5:
@njit
def compare_all_combination(df_index_n, base, quote, price):
^

Encountered the use of a type that is scheduled for deprecation: type 'reflected list' found for argument 'price' of function 'compare_all_combination'.

For more information visit https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-reflection-for-list-and-set-types

File "<ipython-input-9-2b57c81ed620>", line 5:
@njit
def compare_all_combination(df_index_n, base, quote, price):
^

Encountered the use of a type that is scheduled for deprecation: type 'reflected list' found for argument 'quote' of function 'compare_all_combination'.

For more informatio

396224

In [73]:
total_df['test'] = total_df.quote+'/'+total_df.base

In [74]:
total_df

Unnamed: 0,ex_name,symbol,ave_price,vol,base,quote,side,test
0,okx,BCD/BTC,0.000007,2.547421e+04,BCD,BTC,buy,BTC/BCD
1,okx,LUNA/USDC,1.423750,3.285728e+05,LUNA,USDC,buy,USDC/LUNA
2,okx,LINK/BTC,0.000280,3.717639e+04,LINK,BTC,buy,BTC/LINK
3,okx,NEO/BTC,0.000463,6.105353e+03,NEO,BTC,buy,BTC/NEO
4,okx,NULS/BTC,0.000011,5.780933e+04,NULS,BTC,buy,BTC/NULS
...,...,...,...,...,...,...,...,...
2473,dydx,ZEC/USD,35.450000,6.438192e+06,ZEC,USD,sell,USD/ZEC
2474,dydx,SOL/USD,20.455500,5.539325e+07,SOL,USD,sell,USD/SOL
2475,dydx,SUSHI/USD,1.233500,8.610171e+06,SUSHI,USD,sell,USD/SUSHI
2476,dydx,ICP/USD,5.375000,4.724167e+06,ICP,USD,sell,USD/ICP


In [None]:

pair_combinations = []
for i in range(len(combination_list)):
    base_a = total_df.iloc[combination_list[i][0]].base
    quote_a = total_df.iloc[combination_list[i][0]].quote
    base_b = total_df.iloc[combination_list[i][1]].base
    quote_b = total_df.iloc[combination_list[i][1]].quote
    pair = check_sync_symbol(base_a, quote_a, base_b, quote_b)
    if pair != ('',''):
        pair_combinations.append(combination_list[i])
        #print(total_df.iloc[d[i][0]].symbol, ' x ', total_df.iloc[d[i][1]].symbol, ' = ', pair)
print('detected ', len(pair_combinations), ' pairs.')

KeyboardInterrupt: ignored

In [7]:
@njit
def generate_combinations(n):
    result = np.zeros((n**2, 2), dtype=np.int64)
    i = 0
    for x in range(n):
        for y in range(n):
            result[i, 0] = x
            result[i, 1] = y
            i += 1
    return result

comb = generate_combinations(6000)
len(comb)

36000000

In [11]:
merged_df.to_csv('/content/drive/My Drive/mergerd_df.csv')

In [30]:
li = [1,2,3]
li[0:10]

[1, 2, 3]