# Trader Analysis Results Tables

This is for Table 8, 9, and 10 in the paper. Before running this you need to run the code in the two Trader Analysis notebooks to create the necessary data files.

In [None]:
import os
import sys
import re

from itertools import *
from operator import mul
from functools import reduce
import pickle

import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

idx = pd.IndexSlice

from research_tools import storage

pd.options.display.float_format = lambda x: '{:,.4f}'.format(x) if abs(x) < 1 else '{:,.2f}'.format(x)

# Load Data

First load the data we saved at the end of the Trader Analysis notebooks.

In [None]:
def load_pickle(filename):
    with open(os.path.join('data', filename), 'rb') as f:
        return pickle.load(f)

In [None]:
os.chdir('..')

dem_trader_classifications = load_pickle('dem.trader_classifications.p')
dem_trader_stats_summary = load_pickle('dem.trader_stats_summary.p')
dem_group_summary = load_pickle('dem.group_summary.p')

rep_trader_classifications = load_pickle('gop.trader_classifications.p')
rep_trader_stats_summary = load_pickle('gop.trader_stats_summary.p')
rep_group_summary = load_pickle('gop.group_summary.p')

In [None]:
basename = 'dem'

dem_behavior_analysis, = storage.retrieve_all(
    [basename + '.behavior_analysis'])

basename = 'gop'

rep_behavior_analysis, = storage.retrieve_all(
    [basename + '.behavior_analysis'])

Below is Table 8.

In [None]:
def paper_group_summary_table(group_summary, letter):
    out = DataFrame()

    out['% of Traders'] = 100 * group_summary.trader_count / group_summary.trader_count.sum()
    out['Avg Profit ($)'] = group_summary.pnl_net_fee / group_summary.trader_count
    out['Avg Exposure ($)'] = group_summary.sum_max_in_pool / group_summary.trader_count
    out['ROI'] = 100 * group_summary.net_roi
    
    out.columns = pd.MultiIndex.from_tuples([(x, letter) for x in out.columns])

    return out

dem = paper_group_summary_table(dem_group_summary, 'D')
rep = paper_group_summary_table(rep_group_summary, 'R')


table8 = pd.concat([dem, rep], axis=1).sort_index(axis=1, level=0)

table8

In [None]:
print(table8.to_latex())

In [None]:
dem_group_summary.trader_count.sum()

In [None]:
rep_group_summary.trader_count.sum()

Below is Table 9.

In [None]:
def paper_group_summary_exposure_table(group_summary, letter):
    out = DataFrame()

    # number prefixes to simplify column ordering
    out['1-Net Profit'] = group_summary.eval('pnl_net_fee / sum_max_in_pool')
    out['2-Spread'] = group_summary.eval('spread_profit / sum_max_in_pool')
    out['3-Bias'] = group_summary.eval('bias_profit / sum_max_in_pool')
    out['4-Position'] = group_summary.eval('position_profit / sum_max_in_pool')
    out['5-Fee'] = -group_summary.eval('fee / sum_max_in_pool')
    
    out *= 100
    
    out.columns = pd.MultiIndex.from_tuples([(x, letter) for x in out.columns])

    return out

pd.options.display.float_format = lambda x: '{:,.1f}'.format(x)

dem = paper_group_summary_exposure_table(dem_group_summary, 'D')
rep = paper_group_summary_exposure_table(rep_group_summary, 'R')

table9 = pd.concat([dem, rep], axis=1).sort_index(axis=1, level=0)

table9

In [None]:
# use some Python-foo to remove the excess spacing
def remove_spacing(latex):
    return '\n'.join([re.sub('\s+', ' ', l) for l in latex.split('\n')])

print(remove_spacing(table9.to_latex()))

The following is for Table 10.

In [None]:
def group_differences(index1, index2):
    # check that the group indexes are correct
    # the following should all be the same
    diff = set()
    for g1, g2 in zip(table9.iloc[index1].index, table9.iloc[index2].index):
        s1 = set(g1.split())
        s2 = set(g2.split())
        diff.add((tuple(s1 - s2), tuple(s2 - s1)))
    assert len(diff) == 1

    (op1,), (op2,) = diff.pop()
    print('calculating {0} - {1}'.format(op1, op2))
    groupings = [g.replace(op1, '').strip() for g in table9.iloc[index1].index]
    
    diff1 = table8.iloc[index1, -2:].reset_index(drop=True) - table8.iloc[index2, -2:].reset_index(drop=True)
    diff1.index = groupings
    diff1 = diff1.T.unstack(1)

    diff2 = table9.iloc[index1, 2:-2].reset_index(drop=True) - table9.iloc[index2, 2:-2].reset_index(drop=True)
    diff2.index = groupings
    diff2 = diff2.T.unstack(1)

    diff2 = pd.concat([diff2.iloc[0] + diff2.iloc[1], diff2.iloc[2]], axis=1).T
    diff2.index = ['Spread + Bias', 'Position']

    return pd.concat([diff2, diff1], axis=0)

In [None]:
# Efficient - Inefficient
index1 = [2, 0, 3, 1]
index2 = [6, 4, 7, 5]

out = group_differences(index1, index2)

out

In [None]:
print(remove_spacing(out.to_latex()))

In [None]:
# Active - Inactive
index1 = [2, 0, 6, 4]
index2 = [3, 1, 7, 5]

out = group_differences(index1, index2)

out

In [None]:
print(remove_spacing(out.to_latex()))

In [None]:
# Large - Small
index1 = [2, 3, 6, 7]
index2 = [0, 1, 4, 5]

out = group_differences(index1, index2)

out

In [None]:
print(remove_spacing(out.to_latex()))