# Trader Analysis Results Tables

This is for Table 8, 9, and 10 in the paper. Before running this you need to run the code in the two Trader Analysis notebooks to create the necessary data files.

In [1]:
import os
import sys
import re

from itertools import *
from operator import mul
from functools import reduce
import pickle

import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

idx = pd.IndexSlice

from research_tools import storage

pd.options.display.float_format = lambda x: '{:,.4f}'.format(x) if abs(x) < 1 else '{:,.2f}'.format(x)

# Load Data

First load the data we saved at the end of the Trader Analysis notebooks.

In [2]:
def load_pickle(filename):
    with open(os.path.join('data', filename), 'rb') as f:
        return pickle.load(f)

In [3]:
os.chdir('..')

dem_trader_classifications = load_pickle('dem.trader_classifications.p')
dem_trader_stats_summary = load_pickle('dem.trader_stats_summary.p')
dem_group_summary = load_pickle('dem.group_summary.p')

rep_trader_classifications = load_pickle('gop.trader_classifications.p')
rep_trader_stats_summary = load_pickle('gop.trader_stats_summary.p')
rep_group_summary = load_pickle('gop.group_summary.p')

In [4]:
basename = 'dem'

dem_behavior_analysis, = storage.retrieve_all(
    [basename + '.behavior_analysis'])

basename = 'gop'

rep_behavior_analysis, = storage.retrieve_all(
    [basename + '.behavior_analysis'])

Reading data from data/dem.behavior_analysis.p
Reading data from data/gop.behavior_analysis.p


Below is Table 8.

In [5]:
def paper_group_summary_table(group_summary, letter):
    out = DataFrame()

    out['% of Traders'] = 100 * group_summary.trader_count / group_summary.trader_count.sum()
    out['Avg Profit ($)'] = group_summary.pnl_net_fee / group_summary.trader_count
    out['Avg Exposure ($)'] = group_summary.sum_max_in_pool / group_summary.trader_count
    out['ROI'] = 100 * group_summary.net_roi
    
    out.columns = pd.MultiIndex.from_tuples([(x, letter) for x in out.columns])

    return out

dem = paper_group_summary_table(dem_group_summary, 'D')
rep = paper_group_summary_table(rep_group_summary, 'R')


table8 = pd.concat([dem, rep], axis=1).sort_index(axis=1, level=0)

table8

Unnamed: 0_level_0,% of Traders,% of Traders,Avg Exposure ($),Avg Exposure ($),Avg Profit ($),Avg Profit ($),ROI,ROI
Unnamed: 0_level_1,D,R,D,R,D,R,D,R
Efficient Small Active,2.21,0.876,19.2,19.78,0.6285,4.65,3.27,23.49
Efficient Small Inactive,19.33,16.13,13.16,12.62,-2.12,0.5114,-16.15,4.05
Efficient Large Active,3.23,2.07,463.66,519.13,24.87,-1.28,5.36,-0.2468
Efficient Large Inactive,7.44,6.22,298.55,338.21,37.16,36.7,12.45,10.85
Inefficient Small Active,4.51,5.3,17.66,18.11,-4.21,0.9762,-23.82,5.39
Inefficient Small Inactive,45.71,46.41,12.81,12.94,-4.04,-1.41,-31.52,-10.88
Inefficient Large Active,4.64,7.21,455.57,448.94,9.46,-25.56,2.08,-5.69
Inefficient Large Inactive,12.93,15.79,335.07,309.84,-36.33,-32.45,-10.84,-10.47


In [6]:
print(table8.to_latex())

\begin{tabular}{lrrrrrrrr}
\toprule
{} & \% of Traders &        & Avg Exposure (\$) &        & Avg Profit (\$) &        &    ROI &         \\
{} &            D &      R &                D &      R &              D &      R &      D &       R \\
\midrule
Efficient Small Active     &         2.21 & 0.8760 &            19.20 &  19.78 &         0.6285 &   4.65 &   3.27 &   23.49 \\
Efficient Small Inactive   &        19.33 &  16.13 &            13.16 &  12.62 &          -2.12 & 0.5114 & -16.15 &    4.05 \\
Efficient Large Active     &         3.23 &   2.07 &           463.66 & 519.13 &          24.87 &  -1.28 &   5.36 & -0.2468 \\
Efficient Large Inactive   &         7.44 &   6.22 &           298.55 & 338.21 &          37.16 &  36.70 &  12.45 &   10.85 \\
Inefficient Small Active   &         4.51 &   5.30 &            17.66 &  18.11 &          -4.21 & 0.9762 & -23.82 &    5.39 \\
Inefficient Small Inactive &        45.71 &  46.41 &            12.81 &  12.94 &          -4.04 &  -1.41 & -31.

In [7]:
dem_group_summary.trader_count.sum()

3750.0

In [8]:
rep_group_summary.trader_count.sum()

4452.0

Below is Table 9.

In [9]:
def paper_group_summary_exposure_table(group_summary, letter):
    out = DataFrame()

    # number prefixes to simplify column ordering
    out['1-Net Profit'] = group_summary.eval('pnl_net_fee / sum_max_in_pool')
    out['2-Spread'] = group_summary.eval('spread_profit / sum_max_in_pool')
    out['3-Bias'] = group_summary.eval('bias_profit / sum_max_in_pool')
    out['4-Position'] = group_summary.eval('position_profit / sum_max_in_pool')
    out['5-Fee'] = -group_summary.eval('fee / sum_max_in_pool')
    
    out *= 100
    
    out.columns = pd.MultiIndex.from_tuples([(x, letter) for x in out.columns])

    return out

pd.options.display.float_format = lambda x: '{:,.1f}'.format(x)

dem = paper_group_summary_exposure_table(dem_group_summary, 'D')
rep = paper_group_summary_exposure_table(rep_group_summary, 'R')

table9 = pd.concat([dem, rep], axis=1).sort_index(axis=1, level=0)

table9

Unnamed: 0_level_0,1-Net Profit,1-Net Profit,2-Spread,2-Spread,3-Bias,3-Bias,4-Position,4-Position,5-Fee,5-Fee
Unnamed: 0_level_1,D,R,D,R,D,R,D,R,D,R
Efficient Small Active,3.3,23.5,3.5,0.7,5.0,0.7,-2.1,27.0,-3.1,-4.9
Efficient Small Inactive,-16.1,4.1,-1.0,-3.9,4.6,1.5,-17.4,10.4,-2.3,-3.9
Efficient Large Active,5.4,-0.2,7.3,8.1,4.4,5.9,-2.6,-9.7,-3.8,-4.6
Efficient Large Inactive,12.4,10.9,-0.7,-1.7,2.9,6.7,13.6,9.9,-3.3,-4.0
Inefficient Small Active,-23.8,5.4,0.5,-0.3,-3.2,-2.8,-18.9,13.6,-2.2,-5.1
Inefficient Small Inactive,-31.5,-10.9,-2.3,-2.9,-5.2,-9.4,-22.0,5.6,-1.9,-4.2
Inefficient Large Active,2.1,-5.7,-0.2,1.8,-0.9,0.3,5.9,-2.9,-2.7,-4.8
Inefficient Large Inactive,-10.8,-10.5,-1.7,-1.7,-2.1,-3.2,-4.6,-1.7,-2.4,-3.9


In [10]:
# use some Python-foo to remove the excess spacing
def remove_spacing(latex):
    return '\n'.join([re.sub('\s+', ' ', l) for l in latex.split('\n')])

print(remove_spacing(table9.to_latex()))

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} & 1-Net Profit & & 2-Spread & & 3-Bias & & 4-Position & & 5-Fee & \\
{} & D & R & D & R & D & R & D & R & D & R \\
\midrule
Efficient Small Active & 3.3 & 23.5 & 3.5 & 0.7 & 5.0 & 0.7 & -2.1 & 27.0 & -3.1 & -4.9 \\
Efficient Small Inactive & -16.1 & 4.1 & -1.0 & -3.9 & 4.6 & 1.5 & -17.4 & 10.4 & -2.3 & -3.9 \\
Efficient Large Active & 5.4 & -0.2 & 7.3 & 8.1 & 4.4 & 5.9 & -2.6 & -9.7 & -3.8 & -4.6 \\
Efficient Large Inactive & 12.4 & 10.9 & -0.7 & -1.7 & 2.9 & 6.7 & 13.6 & 9.9 & -3.3 & -4.0 \\
Inefficient Small Active & -23.8 & 5.4 & 0.5 & -0.3 & -3.2 & -2.8 & -18.9 & 13.6 & -2.2 & -5.1 \\
Inefficient Small Inactive & -31.5 & -10.9 & -2.3 & -2.9 & -5.2 & -9.4 & -22.0 & 5.6 & -1.9 & -4.2 \\
Inefficient Large Active & 2.1 & -5.7 & -0.2 & 1.8 & -0.9 & 0.3 & 5.9 & -2.9 & -2.7 & -4.8 \\
Inefficient Large Inactive & -10.8 & -10.5 & -1.7 & -1.7 & -2.1 & -3.2 & -4.6 & -1.7 & -2.4 & -3.9 \\
\bottomrule
\end{tabular}



The following is for Table 10.

In [11]:
def group_differences(index1, index2):
    # check that the group indexes are correct
    # the following should all be the same
    diff = set()
    for g1, g2 in zip(table9.iloc[index1].index, table9.iloc[index2].index):
        s1 = set(g1.split())
        s2 = set(g2.split())
        diff.add((tuple(s1 - s2), tuple(s2 - s1)))
    assert len(diff) == 1

    (op1,), (op2,) = diff.pop()
    print('calculating {0} - {1}'.format(op1, op2))
    groupings = [g.replace(op1, '').strip() for g in table9.iloc[index1].index]
    
    diff1 = table8.iloc[index1, -2:].reset_index(drop=True) - table8.iloc[index2, -2:].reset_index(drop=True)
    diff1.index = groupings
    diff1 = diff1.T.unstack(1)

    diff2 = table9.iloc[index1, 2:-2].reset_index(drop=True) - table9.iloc[index2, 2:-2].reset_index(drop=True)
    diff2.index = groupings
    diff2 = diff2.T.unstack(1)

    diff2 = pd.concat([diff2.iloc[0] + diff2.iloc[1], diff2.iloc[2]], axis=1).T
    diff2.index = ['Spread + Bias', 'Position']

    return pd.concat([diff2, diff1], axis=0)

In [12]:
# Efficient - Inefficient
index1 = [2, 0, 3, 1]
index2 = [6, 4, 7, 5]

out = group_differences(index1, index2)

out

calculating Efficient - Inefficient


Unnamed: 0_level_0,Large Active,Large Active,Small Active,Small Active,Large Inactive,Large Inactive,Small Inactive,Small Inactive
Unnamed: 0_level_1,D,R,D,R,D,R,D,R
Spread + Bias,12.8,12.0,11.2,4.6,6.0,9.8,11.1,9.9
Position,-8.5,-6.8,16.8,13.4,18.2,11.6,4.7,4.8
ROI,3.3,5.4,27.1,18.1,23.3,21.3,15.4,14.9


In [13]:
print(remove_spacing(out.to_latex()))

\begin{tabular}{lrrrrrrrr}
\toprule
{} & Large Active & & Small Active & & Large Inactive & & Small Inactive & \\
{} & D & R & D & R & D & R & D & R \\
\midrule
Spread + Bias & 12.8 & 12.0 & 11.2 & 4.6 & 6.0 & 9.8 & 11.1 & 9.9 \\
Position & -8.5 & -6.8 & 16.8 & 13.4 & 18.2 & 11.6 & 4.7 & 4.8 \\
ROI & 3.3 & 5.4 & 27.1 & 18.1 & 23.3 & 21.3 & 15.4 & 14.9 \\
\bottomrule
\end{tabular}



In [14]:
# Active - Inactive
index1 = [2, 0, 6, 4]
index2 = [3, 1, 7, 5]

out = group_differences(index1, index2)

out

calculating Active - Inactive


Unnamed: 0_level_0,Efficient Large,Efficient Large,Efficient Small,Efficient Small,Inefficient Large,Inefficient Large,Inefficient Small,Inefficient Small
Unnamed: 0_level_1,D,R,D,R,D,R,D,R
Spread + Bias,9.5,9.2,5.0,3.8,2.7,7.0,4.9,9.1
Position,-16.2,-19.6,15.3,16.6,10.5,-1.2,3.1,8.1
ROI,-7.1,-11.1,19.4,19.4,12.9,4.8,7.7,16.3


In [15]:
print(remove_spacing(out.to_latex()))

\begin{tabular}{lrrrrrrrr}
\toprule
{} & Efficient Large & & Efficient Small & & Inefficient Large & & Inefficient Small & \\
{} & D & R & D & R & D & R & D & R \\
\midrule
Spread + Bias & 9.5 & 9.2 & 5.0 & 3.8 & 2.7 & 7.0 & 4.9 & 9.1 \\
Position & -16.2 & -19.6 & 15.3 & 16.6 & 10.5 & -1.2 & 3.1 & 8.1 \\
ROI & -7.1 & -11.1 & 19.4 & 19.4 & 12.9 & 4.8 & 7.7 & 16.3 \\
\bottomrule
\end{tabular}



In [16]:
# Large - Small
index1 = [2, 3, 6, 7]
index2 = [0, 1, 4, 5]

out = group_differences(index1, index2)

out

calculating Large - Small


Unnamed: 0_level_0,Efficient Active,Efficient Active,Efficient Inactive,Efficient Inactive,Inefficient Active,Inefficient Active,Inefficient Inactive,Inefficient Inactive
Unnamed: 0_level_1,D,R,D,R,D,R,D,R
Spread + Bias,3.2,12.7,-1.4,7.3,1.5,5.2,3.7,7.3
Position,-0.5,-36.7,31.0,-0.5,24.9,-16.5,17.4,-7.2
ROI,2.1,-23.7,28.6,6.8,25.9,-11.1,20.7,0.4


In [17]:
print(remove_spacing(out.to_latex()))

\begin{tabular}{lrrrrrrrr}
\toprule
{} & Efficient Active & & Efficient Inactive & & Inefficient Active & & Inefficient Inactive & \\
{} & D & R & D & R & D & R & D & R \\
\midrule
Spread + Bias & 3.2 & 12.7 & -1.4 & 7.3 & 1.5 & 5.2 & 3.7 & 7.3 \\
Position & -0.5 & -36.7 & 31.0 & -0.5 & 24.9 & -16.5 & 17.4 & -7.2 \\
ROI & 2.1 & -23.7 & 28.6 & 6.8 & 25.9 & -11.1 & 20.7 & 0.4 \\
\bottomrule
\end{tabular}

