In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.linear_model import LinearRegression

%matplotlib inline
plt.rcParams['figure.figsize'] = [6, 3]

In [163]:
path = '/Users/berg/Projects/Atto/moc_imbalance_flip/data/positions/hold_60000_volume_2000000_spread_0.2_deltaimb_1_date_2020-11-18.csv'

In [103]:
commissions = {'comm': 0.0004, 
               'ecn_remove_market': 0.00375,
               'ecn_moc': 0.001,
               'sec': 0.0000221,
               'clr': 0.00032,
               'ticket': 0.4}

In [216]:
class Element:
    def __init__(self):
        self.__next = None
        self.__prev = None
        self.__first = self

    def on_data(self, data):
        pass

    def on_end(self):
        self.end()

    def then(self, element):
        self.__next = element
        element.__prev = self
        element.__first = self.__first
        return element

    def push_data(self, data):
        self.__next.on_data(data)

    def end(self):
        self.__next.on_end()

    def exec(self):
        self.__first.do_exec()

    def do_exec(self):
        pass
    
    
class DataSrc (Element):
    def __init__(self, path):
        super().__init__()
        self.__path = path
        
    def do_exec(self):
        df = pd.read_csv(path, index_col=0)
        
        self.push_data(df)
        
        
class BaseSink (Element):
    def on_end(self):
        pass
        
        
class Aggregator:
    def __init__(self):
        self.__items = []

    def append(self, data):
        self.__items.append(data)

    def result(self):
        if self.__items:
            return self.__items

class Tee (Element):
    def __init__(self):
        super().__init__()
        self.__outputs = []

    def then(self, element):
        self.__outputs.append(element)
        return super().then(element)

    def push_data(self, data):
        for out in self.__outputs:
            out.on_data(data)

    def end(self):
        for out in self.__outputs:
            out.on_end()


class Sort (Element):
    def __init__(self, key, reverse=False):
        super().__init__()
        self.__items = []
        self.__key = key
        self.__reverse = reverse

    def on_data(self, data):
        self.__items.append(data)

    def on_end(self):
        items_sorted = sorted(self.__items, key=itemgetter(self.__key), reverse=self.__reverse)
        for item in items_sorted:
            self.push_data(item)

        super().on_end()


class Limit (Element):
    def __init__(self, limit):
        super().__init__()
        self.__count = 0
        self.__limit = limit

    def on_data(self, data):
        if self.__count < self.__limit:
            self.__count = self.__count + 1
            self.push_data(data)


class FileJsonSrc (Element):
    def __init__(self, path):
        super().__init__()
        self.__path = path

    def do_exec(self):
        file = open(self.__path)

        try:
            for line in file:
                try:
                    self.push_data(json.loads(line))
                except ValueError:
                    print('could not parse line {}'.format(line))
        finally:
            file.close()
            self.end()


class ForEach (Element):
    def on_data(self, data):
        if isinstance(data, dict):
            for key in data:
                self.push_data((key,data[key]))
        elif isinstance(data, list):
            for v in data:
                self.push_data(v)


class VolumeFilter (Element):
    def __init__(self, min_volume: int):
        super().__init__()
        self.__min_volume = min_volume

    def on_data(self, data):
        data = data[data['volume'] > self.__min_volume].copy()
        self.push_data(data)


class SizeFilter (Element):
    def __init__(self, max_size: int):
        super().__init__()
        self.__max_size = max_size

    def on_data(self, data):
        data = data[data['position_size_bp'] <= self.__max_size].copy()
        self.push_data(data)
     
    
class PriceFilter (Element):
    def __init__(self, min_price: int):
        super().__init__()
        self.__min_price = min_price

    def on_data(self, data):
        data = data[data['open_price'] >= self.__min_price].copy()
        self.push_data(data)

class ImbalanceDeltaFilter (Element):
    def __init__(self, abs_delta_imb: float):
        super().__init__()
        self.__abs_delta_imb = abs_delta_imb

    def on_data(self, data):
        data = data[data['abs_deltaImbPct'] >= self.__abs_delta_imb].copy()
        self.push_data(data)
        

class CloseStatusFilter (Element):
    def __init__(self, close_status_filter: True, close_status: str):
        super().__init__()
        self.__close_status_filter = close_status_filter
        self.__close_status = close_status

    def on_data(self, data):
        if self.__close_status_filter:
            data = data[data['close_status'] >= self.__close_status].copy()
        self.push_data(data)
        

class DayFilter (Element):
    def __init__(self, day_filter: False, day_name: 'Friday'):
        super().__init__()
        self.__day_filter = day_filter
        self.__day_name = day_name

    def on_data(self, data):
        data['timeindex'] = pd.to_datetime(data['date'])
        data.loc[:, 'day_name'] = data['timeindex'].map(lambda row: row.strftime("%A"))
        if self.__day_filter:
            data = data[data['day_name']==self.__day_name]
            
        self.push_data(data)
        
        
        
class AddCommission (Element):
    def __init__(self, comm, ecn_remove_market, ecn_moc, sec, clr, ticket):
        super().__init__()
        self.__comm = comm
        self.__ecn_remove_market = ecn_remove_market
        self.__ecn_moc = ecn_moc
        self.__sec = sec
        self.__clr = clr
        self.__ticket = ticket

    def on_data(self, data):
        data['Comm'] = data['position_size_bp'] * self.__comm
        data['Ecn Remove entry'] = data['position_size_bp'] * self.__ecn_remove_market
        data['Ecn Remove exit'] = np.where(data['close_status'] == 'market', 
                                           data['position_size_bp'] * self.__ecn_remove_market, 
                                           data['position_size_bp'] * self.__ecn_moc)
        data['Ecn Fee'] = data['Ecn Remove entry'] + data['Ecn Remove exit']
        data['Sec'] = np.where(data['direction'] == 'Short', 
                               data['position_size_bp'] * self.__sec, 
                               0.0)
        data['Clr'] = data['position_size_bp'] * self.__clr
        data['Ticket'] = 2 * self.__ticket
        # Total
        data['Fees'] = data['Comm'] + data['Ecn Fee'] + data['Sec'] + data['Clr'] + data['Ticket']
        # Net Pnl
        data['position_pnl_bp_net'] = data['position_pnl_bp'] - data['Fees']
        
        self.push_data(data)
        

class AddColumns (Element):
    def __init__(self, items: list):
        super().__init__()
        self.__items = items
    
    def on_data(self, data):
        abs_delta_imb_c = self.__items[0]
        data[abs_delta_imb_c] = data['deltaImbPct'].abs()
        self.push_data(data)
        
class Resample (Element):
    def __init__(self, freq: str):
        super().__init__()
        self.__freq = freq
    
    def on_data(self, data):
        data_resample = pd.DataFrame()
        data.set_index('timeindex', inplace=True)
        # Pnl
        data_resample['GrossPnl'] = data['position_pnl_bp'].resample('B').sum()
        data_resample['NetPnl'] = data['position_pnl_bp_net'].resample('B').sum()
        data_resample['CumGrossPnl'] = data_resample['GrossPnl'].cumsum().fillna(0)
        data_resample['CumNetPnl'] = data_resample['NetPnl'].cumsum().fillna(0)
        # Volume
        data_resample['Volume'] = data['position_size_bp'].resample('B').sum()
        data_resample['Turnover'] = data_resample['Volume'] * 2
        # Hold
        data['delta_time_close_open'] = (pd.to_datetime(data['stop']) - pd.to_datetime(data['start'])).dt.total_seconds() 
        data_resample['AvgHold'] = data['delta_time_close_open'].resample('B').mean()
        # Symbols
        TradedSymbols = len(data['symbol'].unique())
        # Filter zero volume days - no trading
        data_resample = data_resample[data_resample['Volume'] > 0]
        # Drawdown
        data_resample['RollMax'] = data_resample['CumNetPnl'].cummax()
        data_resample['Drawdown'] = data_resample['CumNetPnl'] - data_resample['RollMax']
        
        self.push_data(data_resample)
        
class Ratios (Element):
    def __init__(self):
        super().__init__()
        self.__ratios = []
    
    def on_data(self, data):
        PnL = round(data['NetPnl'].sum(), 0)
        PPS = round(data['GrossPnl'].sum() / data['Volume'].sum(), 5)
        Turnover = data['Turnover'].sum()
        TradedDays = len(data[data['Volume'] > 0])
        AvgHold = round(data['AvgHold'].mean(), 2)
        Sharpe = round(np.sqrt(252) * data['NetPnl'].mean() / data['NetPnl'].std(), 2)
        MaxDD = round(data['Drawdown'].min(), 2)
        MeanDD = round(data['Drawdown'].mean(), 2)
        MaxDDPct = round(MaxDD * 100 / PnL, 2)
        MeanDDPct =round( MeanDD * 100 / PnL, 2)
        BestDay = round(data['NetPnl'].max(), 0)
        WorstDay = round(data['NetPnl'].min(), 0)
        ProfitableDaysPct = round(len(data[data['NetPnl'] > 0]) * 100 / len(data), 2)
        
        self.__ratios.append({'PnL': PnL, 
               'PPS': PPS, 
               'Turnover': Turnover, 
               'TradedDays': TradedDays, 
               'AvgHold': AvgHold, 
               'BestDay': BestDay, 
               'WorstDay': WorstDay, 
               'ProfitableDays%': ProfitableDaysPct, 
               'Sharpe': Sharpe, 
               'MaxDD': MaxDD, 
               'MeanDD': MeanDD, 
               'MaxDD%': MaxDDPct, 
               'MeanDD%': MeanDDPct})
        
        print(pd.DataFrame(self.__ratios))
        

class GroupBy (Element):
    def __init__(self, key, aggregator = Aggregator):
        super().__init__()
        self.__key = key
        self.__aggregator = aggregator
        self.__groups = {}

    def on_data(self, data):
        if isinstance(data, dict):
            key = data.get(self.__key, '')
            group = self.__groups.get(key)
            if group is None:
                self.__groups[key] = group = self.__aggregator()

            group.append(data)

    def on_end(self):
        for key in self.__groups:
            v = self.__groups[key].result()
            if v is not None:
                self.push_data((key, v))

        super().on_end()


class ConsoleOutput (BaseSink):
    def on_data(self, data):
        print(data)
        

class CalculateSharpe (BaseSink):
    def __init__(self):
        super().__init__()
        self.__mean = 0
        self.__std = 0
        self.__sharpe = 0
        
    def on_data(self, data):
        self.__mean = data['position_pnl_bp_net'].mean()
        self.__std = data['position_pnl_bp_net'].std()
        self.__sharpe = self.__mean / self.__std
        print(self.__sharpe)
#         self.push_data(data)

In [220]:
DataSrc(path)\
    .then(AddCommission(**commissions))\
    .then(AddColumns(['abs_deltaImbPct']))\
    .then(VolumeFilter(0))\
    .then(SizeFilter(900))\
    .then(PriceFilter(20))\
    .then(ImbalanceDeltaFilter(3.0))\
    .then(CloseStatusFilter(True, 'moc'))\
    .then(DayFilter(False, 'Friday'))\
    .then(Resample('B'))\
    .then(Ratios())\
    .exec()

       PnL      PPS      Turnover  TradedDays  AvgHold  BestDay  WorstDay  \
0  21799.0  0.08565  559569.53981         148    40.87   4183.0    -316.0   

   ProfitableDays%  Sharpe   MaxDD  MeanDD  MaxDD%  MeanDD%  
0            75.68     5.0 -316.28  -32.35   -1.45    -0.15  
