In [1]:
import ccxt
import pandas as pd
import datetime
import time

In [2]:
def fetch_historical_data(symbol='BTC/USDT', timeframe='1h', limit=10000):
    all_candles = []
    binance = ccxt.binance({
        'rateLimit': 1200,
        'options': {
            'adjustForTimeDifference': True,
        }
    })

    start_date = int(datetime.datetime(2018, 1, 1, 10, 20).timestamp() * 1000)
    since = start_date

    while len(all_candles) < limit:
        ohlcv = binance.fetch_ohlcv(symbol, timeframe, since, min(limit, 1000))
        
        if len(ohlcv) == 0:
            break

        since = ohlcv[-1][0]  # Update the 'since' timestamp for the next batch
        all_candles += ohlcv
        time.sleep(binance.rateLimit / 1000)  # Respect the rate limit of the exchange

    df = pd.DataFrame(all_candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    return df

In [3]:
df = fetch_historical_data()
df.head()

Unnamed: 0,timestamp,open,high,low,close,volume
0,2018-01-01 03:00:00,13330.26,13611.27,13290.0,13410.03,420.08703
1,2018-01-01 04:00:00,13434.98,13623.29,13322.15,13601.01,340.807329
2,2018-01-01 05:00:00,13615.2,13699.0,13526.5,13558.99,404.229046
3,2018-01-01 06:00:00,13539.0,13800.0,13510.0,13780.41,264.989684
4,2018-01-01 07:00:00,13780.0,13818.55,13555.02,13570.35,292.188777


In [6]:
import pandas as pd

# Add a new column with the shifted close prices to compare with
df['close_shifted'] = df['close'].shift(-1)

# Calculate the encoded 'price_movement' based on your conditions
df['price_movement'] = 1  # Initialize as no change
df.loc[df['close'] < df['close_shifted'], 'price_movement'] = 2  # Price goes up
df.loc[df['close'] > df['close_shifted'], 'price_movement'] = 0  # Price goes down

# Drop the 'close_shifted' as it was only used for the calculation
df.drop('close_shifted', axis=1, inplace=True)

In [4]:
import torch
from torch.utils.data import Dataset

class PriceMovementDataset(Dataset):
    
    def __init__(self, split, data, length=10):
        assert split in {'train', 'test'}
        self.split = split
        self.data = data  # This is your encoded 'price_movement' series as a PyTorch tensor
        self.length = length  # Sequence length for training/testing

    def get_vocab_size(self):
        return 3
    
    def get_block_size(self):
        # the length of the sequence that will feed into transformer, 
        # containing concatenated input and the output, but -1 because
        # the transformer starts making predictions at the last input element
        return self.length * 2 - 1
    
    def __len__(self):
        return len(self.data) - self.length  # Adjust length to account for sequence length
    
    def __getitem__(self, idx):
        
        # Create sequences
        inp = self.data[idx:idx+self.length]
        sol = self.data[idx+1:idx+self.length+1]

        inp = torch.from_numpy(inp)
        sol = torch.from_numpy(sol)

        # concatenate the problem specification and the solution
        cat = torch.cat((inp, sol), dim=0)

        # the inputs to the transformer will be the offset sequence
        x = cat[:-1].clone()
        y = cat[1:].clone()
        # we only want to predict at output locations, mask out the loss at the input locations
        y[:self.length-1] = -1

        return x, y

In [14]:
train_dataset = PriceMovementDataset('train', df['price_movement'].to_numpy()[:9980])
test_dataset = PriceMovementDataset('test', df['price_movement'].to_numpy()[9980:10000])

In [15]:
df['price_movement'].to_numpy()[9980:10000]

array([2, 2, 0, 0, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 2, 2, 0, 2, 0, 1],
      dtype=int64)

In [16]:
from mingpt.model import GPT

model_config = GPT.get_default_config()
model_config.model_type = 'gpt-nano'
model_config.vocab_size = train_dataset.get_vocab_size()
model_config.block_size = train_dataset.get_block_size()
model = GPT(model_config)

number of parameters: 0.09M


In [17]:
# create a Trainer object
from mingpt.trainer import Trainer

train_config = Trainer.get_default_config()
train_config.learning_rate = 5e-4 # the model we're using is so small that we can go a bit faster
train_config.max_iters = 6000
train_config.num_workers = 0
trainer = Trainer(train_config, model, train_dataset)

running on device cuda


In [18]:
def batch_end_callback(trainer):
    if trainer.iter_num % 100 == 0:
        print(f"iter_dt {trainer.iter_dt * 1000:.2f}ms; iter {trainer.iter_num}: train loss {trainer.loss.item():.5f}")
trainer.set_callback('on_batch_end', batch_end_callback)

trainer.run()

iter_dt 0.00ms; iter 0: train loss 1.11192
iter_dt 27.05ms; iter 100: train loss 0.52294
iter_dt 20.00ms; iter 200: train loss 0.09539
iter_dt 24.04ms; iter 300: train loss 0.07843
iter_dt 19.00ms; iter 400: train loss 0.06929
iter_dt 20.99ms; iter 500: train loss 0.06978
iter_dt 17.00ms; iter 600: train loss 0.07865
iter_dt 17.02ms; iter 700: train loss 0.06816
iter_dt 12.98ms; iter 800: train loss 0.07398
iter_dt 13.00ms; iter 900: train loss 0.06834
iter_dt 17.00ms; iter 1000: train loss 0.07738
iter_dt 13.00ms; iter 1100: train loss 0.07010
iter_dt 12.00ms; iter 1200: train loss 0.06579
iter_dt 11.99ms; iter 1300: train loss 0.07070
iter_dt 24.00ms; iter 1400: train loss 0.06840
iter_dt 20.99ms; iter 1500: train loss 0.06952
iter_dt 20.00ms; iter 1600: train loss 0.07000
iter_dt 17.00ms; iter 1700: train loss 0.07149
iter_dt 17.00ms; iter 1800: train loss 0.08125
iter_dt 17.00ms; iter 1900: train loss 0.06668
iter_dt 19.00ms; iter 2000: train loss 0.07049
iter_dt 15.00ms; iter 2100

In [19]:
model.eval();

In [20]:
import torch
from torch.utils.data import Dataset
from torch.utils.data.dataloader import DataLoader
from mingpt.utils import set_seed
set_seed(3407)

def eval_split(trainer, split, max_batches):
    dataset = {'train':train_dataset, 'test':test_dataset}[split]
    n = train_dataset.length # naugy direct access shrug
    results = []
    mistakes_printed_already = 0
    loader = DataLoader(dataset, batch_size=100, num_workers=0, drop_last=False)
    for b, (x, y) in enumerate(loader):
        x = x.to(trainer.device)
        y = y.to(trainer.device)
        # isolate the input pattern alone
        inp = x[:, :n]
        sol = y[:, -n:]
        # let the model sample the rest of the sequence
        cat = model.generate(inp, n, do_sample=False) # using greedy argmax, not sampling
        sol_candidate = cat[:, n:] # isolate the filled in sequence
        # compare the predicted sequence to the true sequence
        #print("sol: ", sol[10:15,-8:])
        #print("sol_candidate: ", sol_candidate[10:15,-8:])
        correct = (sol == sol_candidate).all(1).cpu() # Software 1.0 vs. Software 2.0 fight RIGHT on this line haha
        #print("correct:",correct)
        for i in range(x.size(0)):
            results.append(int(correct[i]))
            if not correct[i] and mistakes_printed_already < 3: # only print up to 5 mistakes to get a sense
                mistakes_printed_already += 1
                print("GPT claims that with %s the future price dynamics is %s but gt is %s" % (inp[i].tolist(), sol_candidate[i].tolist(), sol[i].tolist()))
        if max_batches is not None and b+1 >= max_batches:
            break
    rt = torch.tensor(results, dtype=torch.float)
    print("%s final score: %d/%d = %.2f%% correct" % (split, rt.sum(), len(results), 100*rt.mean()))
    return rt.sum()

# run a lot of examples from both train and test through the model and verify the output correctness
with torch.no_grad():
    train_score = eval_split(trainer, 'train', max_batches=50)
    test_score  = eval_split(trainer, 'test',  max_batches=50)

GPT claims that with [0, 2, 0, 0, 2, 0, 0, 0, 0, 2] the future price dynamics is [2, 0, 0, 2, 0, 0, 0, 0, 2, 0] but gt is [2, 0, 0, 2, 0, 0, 0, 0, 2, 2]
GPT claims that with [2, 0, 0, 0, 0, 2, 2, 0, 2, 2] the future price dynamics is [0, 0, 0, 0, 2, 2, 0, 2, 2, 0] but gt is [0, 0, 0, 0, 2, 2, 0, 2, 2, 2]
GPT claims that with [0, 0, 0, 0, 2, 2, 0, 2, 2, 2] the future price dynamics is [0, 0, 0, 2, 2, 0, 2, 2, 2, 0] but gt is [0, 0, 0, 2, 2, 0, 2, 2, 2, 2]
train final score: 2769/5000 = 55.38% correct
GPT claims that with [2, 2, 0, 0, 2, 2, 0, 2, 0, 2] the future price dynamics is [2, 0, 0, 2, 2, 0, 2, 0, 2, 2] but gt is [2, 0, 0, 2, 2, 0, 2, 0, 2, 0]
GPT claims that with [0, 2, 0, 2, 2, 0, 2, 2, 0, 2] the future price dynamics is [2, 0, 2, 2, 0, 2, 2, 0, 2, 2] but gt is [2, 0, 2, 2, 0, 2, 2, 0, 2, 0]
GPT claims that with [2, 0, 2, 2, 0, 2, 2, 0, 2, 0] the future price dynamics is [0, 2, 2, 0, 2, 2, 0, 2, 0, 2] but gt is [0, 2, 2, 0, 2, 2, 0, 2, 0, 1]
test final score: 7/10 = 70.00% corr

In [21]:
train_score

tensor(2769.)

In [22]:
trainer.device

'cuda'