In [1]:
import pandas as pd
import numpy as np
import statistics as s
from datamodel import TradingState, Listing, OrderDepth

In [2]:
trainfiles = [
    "data/island-data-bottle-round-1/prices_round_1_day_-1.csv",
    "data/island-data-bottle-round-1/prices_round_1_day_-2.csv",
    "data/island-data-bottle-round-1/prices_round_1_day_0.csv",
]

In [3]:
dfs = [
    pd.read_csv(file, delimiter=';').to_dict("records") 
    for file in trainfiles
]

In [4]:
def generate_states(df):
    trading_states_d = {}
    for d in df:
        timestamp = d['timestamp']
        product = d['product']
        buy_orders = {}
        sell_orders = {}
        # bids
        for i in range(1,4):
            price = d['bid_price_'+str(i)]
            volume = d['bid_volume_'+str(i)]
            buy_orders[price] = volume
        # asks
        for i in range(1,4):
            price = d['ask_price_'+str(i)]
            volume = d['ask_volume_'+str(i)]
            sell_orders[price] = -volume
        
        listing = Listing(
            product,
            product,
            product
        )

        order_depth = OrderDepth(
            buy_orders,
            sell_orders,
            d['mid_price']
        )

        if timestamp not in trading_states_d:
            trading_states_d[timestamp] = TradingState(
                timestamp,
                {},
                {},
                {},
                {},
                {},
                {}
            )

        trading_states_d[timestamp].listings[product] = listing
        trading_states_d[timestamp].order_depths[product] = order_depth
    return list(trading_states_d.items())

In [5]:
states_by_days = [
    generate_states(df) for df in dfs
]

In [6]:
class Indicator:
    def __init__(self, windows, product):
        self.obnames = [
            # 'bid_means',
            # 'ask_means',
            'bid_volumes',
            'ask_volumes',
            'mid_prices',
            # 'volume_diffs',
            'best_bids',
            'best_asks'
        ]
        self.product = product
        self.mw = max(windows)
        self.windows = windows
        self.curr_len = 0
        self.cache = {
            name:[] for name in self.obnames
        }

    def add_newstate(self, d):
        bids = []
        buytotal = 0
        buyvolume = 0
        for p, v in d.order_depths[self.product].buy_orders.items():
            if not np.isnan(p):
                bids.append(p)
                buytotal += p*v
                buyvolume += v
        asks = []
        selltotal = 0
        sellvolume = 0
        for p, v in d.order_depths[self.product].sell_orders.items():
            if not np.isnan(p):
                asks.append(p)
                selltotal += p*v
                sellvolume += v
        
        # code to get mid price since was available in order book of 
        # train but not when executing
        best_bid, best_ask = 0, 0
        if len(d.order_depths[self.product].buy_orders) > 0:
            best_bid = max(d.order_depths[self.product].buy_orders.keys())
        if len(d.order_depths[self.product].sell_orders) > 0:
            best_ask = min(d.order_depths[self.product].sell_orders.keys())

        if best_bid == 0:
            mid_price = best_ask
        elif best_ask == 0:
            mid_price = best_bid
        else:
            mid_price = best_bid + (best_ask-best_bid) / 2

        self.curr_len += 1
        # self.cache['bid_means'].append(buytotal/buyvolume)
        # self.cache['ask_means'].append(selltotal/sellvolume)
        self.cache['bid_volumes'].append(buyvolume)
        self.cache['ask_volumes'].append(sellvolume)
        self.cache['mid_prices'].append(mid_price)
        # self.cache['volume_diffs'].append(buyvolume-sellvolume)
        self.cache['best_bids'].append(max(bids))
        self.cache['best_asks'].append(min(asks))

        if self.curr_len > self.mw:
            self.curr_len -= 1
            # self.cache['bid_means'].pop(0)
            # self.cache['ask_means'].pop(0)
            self.cache['bid_volumes'].pop(0)
            self.cache['ask_volumes'].pop(0)
            self.cache['mid_prices'].pop(0)
            # self.cache['volume_diffs'].pop(0)
            self.cache['best_bids'].pop(0)
            self.cache['best_asks'].pop(0)
    
    def indicator(self, key):
        ret = []
        for wi in self.windows:
            wind = []
            if 'volumes' not in key:
                curr = [0,0,0,0]
            else:
                curr = [0,0]
            if self.curr_len >= wi:
                ds = self.cache[key][-wi:]
                if 'volumes' not in key:
                    try:
                        wind.append(max(ds))
                    except:
                        wind.append(0)
                    try:
                        wind.append(min(ds))
                    except:
                        wind.append(0)
                # try:
                #     wind.append(s.fmean(ds))
                # except:
                #     wind.append(0)
                try:
                    wind.append(s.geometric_mean(ds))
                except:
                    wind.append(0)
                try:
                    wind.append(s.stdev(ds))
                except:
                    wind.append(0)
                curr = wind
            ret.extend(curr)
        return ret
    
    def compute_single(self, states):
        ret = []
        self.add_newstate(states[-1][1])
        for ob in self.obnames:
            ret.extend(self.indicator(ob))
        return ret

In [16]:
products = ['BANANAS', 'PEARLS']
windows = [3**i for i in range(8)]
print(windows)

[1, 3, 9, 27, 81, 243, 729, 2187]


In [8]:
def compute_training(states, products):
    from tqdm import tqdm
    xss = {}
    yss = {}
    norm = {}
    for product in products:
        indicator = Indicator(windows, product)
        ys = []
        xs = []
        for i in tqdm(range(1,len(states))):
            xws = states[:i]
            yws = states[i:]
            xs.append(np.array(indicator.compute_single(xws)))
            ys.append(
                {
                    "buy":compute_gt(yws, product, [1, 2, 4], [1, 2, 4, 8, 16, 32, 64], [1, 2, 4, 8]),
                    "borrow":compute_gt(yws, product, [1, 2, 4], [1, 2, 4, 8, 16, 32, 64], [1, 2, 4, 8], shortsell=True)
                }
            )
        
        xs = np.array(xs)
        mean = np.mean(xs, axis=0)
        std = np.std(xs, axis=0) + 0.01
        xs = (xs - mean) / std

        norm[product] = (mean, std)
        xss[product] = xs
        yss[product] = ys
        
    for i, product_i in enumerate(products):
        for product_j in products[i+1:]:
            xss[product_i+product_j] = xss[product_i] - xss[product_j]
    return norm, xss, yss

def compute_gt(states, product, margins, windows, volumes, shortsell=False):
    ret = {}
    mw = max(windows)
    ds = states[:mw+1]

    # Purchases
    infrom = ds[0][1].order_depths[product].sell_orders
    if shortsell:
        infrom = ds[0][1].order_depths[product].buy_orders
    purchases = {}
    psum = 0
    vsum = 0
    tbuy = 0
    for p, v in infrom.items():
        if tbuy > max(volumes):
            break
        if not np.isnan(p):
            for pv in range(1, int(abs(v))+1):
                if tbuy > max(volumes):
                    break
                psum += p
                vsum += 1
                tbuy += 1
                if tbuy in volumes:
                    purchases[tbuy] = psum/vsum
                    psum = 0
                    vsum = 0

    # Sells  
    for margin in margins:
        ret[margin] = {window:{volume: 0 for volume in volumes} for window in windows}
        paidprice = purchases[1]
        tsold = 0
        
        for wi, (_, d) in enumerate(ds[1:]):
            outto = d.order_depths[product].buy_orders
            if shortsell:
                outto = d.order_depths[product].sell_orders
            
            tsum = 0
            vsum = 0
            avgsell = 0
            for p, v in outto.items():
                if not np.isnan(p):
                    for _ in range(int(abs(v))):
                        tsum += p
                        vsum += 1
                        avgsell = tsum/vsum
                        if (
                            ((avgsell < (paidprice + margin)) and not shortsell) or
                            (((paidprice - margin) < avgsell) and shortsell)
                        ):
                            break
                        
                        tsold += 1
                        if tsold in volumes:
                            if tsold not in purchases:
                                break
                            paidprice = purchases[tsold]
            
                if (
                    ((avgsell < (paidprice + margin)) and not shortsell) or
                    (((paidprice - margin) < avgsell) and shortsell)
                ):
                    break

            # Check Window
            if wi+1 in windows:
                ret[margin][wi+1] = {
                    volume: 1 if tsold >= volume else 0 for volume in volumes 
                }
    return ret

In [9]:
trainingsets = [
    compute_training(states, products)
    for states in states_by_days
]

100%|██████████| 9999/9999 [01:51<00:00, 89.87it/s] 
100%|██████████| 9999/9999 [01:52<00:00, 88.49it/s] 
100%|██████████| 9999/9999 [01:50<00:00, 90.17it/s] 
100%|██████████| 9999/9999 [01:52<00:00, 88.67it/s] 
100%|██████████| 9999/9999 [01:51<00:00, 89.91it/s] 
100%|██████████| 9999/9999 [01:53<00:00, 88.38it/s] 


In [10]:
def compute_classes(yss, products, definitions):
    gts = {}
    names = []
    for d in definitions:
        names.append(d['name'])
    names.append('Neutral')
    for product in products:
        neutral = 0
        gts[product] = []

        for state in yss[product]:
            gt = [0] * (len(definitions)+1)
            for i, definition in enumerate(definitions):
                typehold = state['buy'] if definition['type'] == 'buy' else state['borrow'] 
                margins = definition['margins']
                mvolume = definition['minvol']
                wnstates = definition['withinstates']

                for margin in margins:
                    if typehold[margin][wnstates][mvolume] == 1:
                        gt[i] = 1
                
                if gt[i] == 1:
                    break
            if gt[i] == 0:
                gt[-1] = 1
                neutral += 1
            gts[product].append(np.array(gt))
        print(product, neutral)
    return names, gts

In [11]:
definitions = [
    {
        'name': 'FlipBuy',
        'type': 'buy',
        'margins': [4, 2, 1],
        'minvol':1,
        'withinstates':8
    },
    {
        'name': 'ShortBuy',
        'type': 'buy',
        'margins': [4, 2, 1],
        'minvol':1,
        'withinstates':16
    },
    {
        'name': 'MediumBuy',
        'type': 'buy',
        'margins': [4, 2, 1],
        'minvol':2,
        'withinstates':32
    },
    {
        'name': 'LongBuy',
        'type': 'buy',
        'margins': [4, 2, 1],
        'minvol':4,
        'withinstates':64
    },
    {
        'name': 'CrashShort',
        'type': 'borrow',
        'margins': [4, 2, 1],
        'minvol':1,
        'withinstates':8
    },
    {
        'name': 'ShortShort',
        'type': 'borrow',
        'margins': [4, 2, 1],
        'minvol':1,
        'withinstates':16
    },
    {
        'name': 'MediumShort',
        'type': 'borrow',
        'margins': [4, 2, 1],
        'minvol':2,
        'withinstates':32
    },
    {
        'name': 'LongShort',
        'type': 'borrow',
        'margins': [4, 2, 1],
        'minvol':4,
        'withinstates':64
    },
]

In [12]:
traininggts = [
    compute_classes(yss, products, definitions)
    for _, _, yss in trainingsets
]

BANANAS 3745
PEARLS 8023
BANANAS 3680
PEARLS 7912
BANANAS 3807
PEARLS 7985


In [13]:
import pickle 

with open('trainingsets.pkl', 'wb') as f:  # open a text file
    pickle.dump(trainingsets, f) # serialize the list

with open('traininggts.pkl', 'wb') as f:  # open a text file
    pickle.dump(traininggts, f) # serialize the list