In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import os

In [12]:
# helpers
def category_filter(df, cat, col_name='player_id'):
    cond = df[col_name] == cat
    df_sub = df[cond]
    return df_sub

def extract_session_ids(df):
    first_row = df.iloc[0]
    subsession_id = first_row['subsession_id']
    market_id = first_row['market_id']
    return (subsession_id, market_id)

SUBJECT_ROLES = ['automated', 'out', 'manual']
SCALED_FIELDS = ['bid', 'offer', 'staged_bid', 'staged_offer', 'net_worth', 'cash', 'implied_bid', 'implied_offer',
                'best_bid', 'best_offer', 'reference_price']
K_SCALE = 1e-4
MIN_BID = 0
MAX_OFFER = 2147483647
FOLDER_NAME_BASE = 'subsession_%s_%s'
REPORT_FILENAME_BASE = os.path.join(FOLDER_NAME_BASE, 'market:%s_player_%s_%s.png')


def make_dir(subsession_id, ts):
    try:
        os.mkdir(FOLDER_NAME_BASE % (subsession_id, ts))
    except OSError:
        pass
    

def extract_date(df, ts_col_name='timestamp'):
    return str(df[ts_col_name].iloc[0])
    

def df_processor(df, ts_col_name='timestamp', roles_to_include=SUBJECT_ROLES, scaled_fields=SCALED_FIELDS, k=K_SCALE):
    df_copy = df.copy()
    is_subject_record = df_copy.trader_model_name.isin(roles_to_include)
    df_copy = df_copy[is_subject_record]
    if scaled_fields:
        df_copy[scaled_fields] = k * df_copy[scaled_fields]#
    tseries = df_copy[ts_col_name]
    tseries = pd.to_datetime(tseries)
    if tseries.shape[0] != 0:
        tseries = tseries - tseries.iloc[0]
        df_copy[ts_col_name] = pd.to_datetime(tseries)
    return df_copy

In [15]:
# plotting
plt.style.use('fivethirtyeight')

def ts_plotter(dataframe, column_names: list, legend_labels, y_axis_limit, y_axis_title,
        figsize=(36, 9), ts_col_name='timestamp', save_fig=True, fig_name='figure'):
    fig, ax = plt.subplots(figsize=figsize)
    for ix, name in enumerate(column_names):
        legend_label = legend_labels[ix]
        ax.plot(dataframe[ts_col_name], dataframe[name], linewidth=2, label=legend_label)
        ax.legend(fontsize='medium')
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%M:%S'))
        ax.set_xlim(left=dataframe[ts_col_name].iloc[0])
        ax.set_xlabel('Time')
        bot_y_lim, top_y_lim = y_axis_limit
        ax.set_ylim(bottom=bot_y_lim, top=top_y_lim)
        ax.set_title(y_axis_title, fontsize=16)
        ax.tick_params(axis='x', which='both', labelsize=8)
        ax.tick_params(axis='y', which='both', labelsize=16)
        ax.xaxis.set_major_locator(plt.MaxNLocator(60))
    plt.savefig(fig_name)
    plt.show()

def hft_report(source_filename, plots_meta):
    df = pd.read_csv(source_filename)
    for player_id in df['player_id'].unique():
        df_sub = category_filter(df, player_id)
        if not df_sub.empty:
            subsession_id, market_id = extract_session_ids(df_sub)
            df_sub = df_processor(df_sub)
            if not df_sub.empty:
                for ix, columns_set in enumerate(plots_meta['y_groups']):
                    plt.close('all')
                    title = plots_meta['y_titles'][ix]
                    session_time = extract_date(df)
                    filename = REPORT_FILENAME_BASE % (subsession_id, session_time, market_id, player_id, title)
                    make_dir(subsession_id, session_time)
                    ts_plotter(df_sub, 
                               columns_set, 
                               plots_meta['legend_labels'][ix], 
                               plots_meta['y_limits'][ix], 
                               title,
                               fig_name=filename)