# Process data config

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import json
import joblib
from collections import defaultdict
from tqdm.auto import tqdm

import lightgbm as lgb

from eli5.sklearn import PermutationImportance
from shaphypetune import BoostBoruta

from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.model_selection import GroupKFold, StratifiedGroupKFold, train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import log_loss, mean_squared_error, precision_score

import torch

from colorama import Style, Fore


class CFG:
    ttype = 'buy'
    select_features = False
    train_NN = False
    train_LGBM = True
    n_repeats = 1
    n_folds = 5

# Load data and add indicators

In [2]:
# last previous data point to collect for model training (value represents number of hours before signal point)
last = 96

if CFG.ttype == 'buy':
    df = pd.read_pickle(f'signal_stat/train_buy_{last}.pkl')
else:
    df = pd.read_pickle(f'signal_stat/train_sell_{last}.pkl')

# df = df[df['pattern'].isin(['MACD', 'STOCH_RSI'])]

display(df.head())
display(df.shape)


Unnamed: 0,time,open,high,low,close,volume,rsi,stoch_slowk,stoch_slowd,stoch_slowk_dir,...,linear_reg_angle_prev_96,macd_prev_96,macdsignal_prev_96,macdhist_prev_96,macd_dir_prev_96,macdsignal_dir_prev_96,atr_prev_96,close_smooth_prev_96,target,ttype
0,2022-10-26 01:00:00,24.15,24.15,23.76,23.84,13772.65,64.054892,83.517327,87.090858,-0.029809,...,-10.369665,-0.020052,-0.052992,0.03294,-0.201703,-0.103966,0.133742,22.383333,1,buy
1,2022-10-26 05:00:00,24.04,24.1,23.98,24.03,8515.96,68.293118,54.118435,61.22084,-0.108235,...,-11.869113,-0.017024,-0.033512,0.016488,0.0,-0.106127,0.121932,22.38125,1,buy
4,2022-12-27 09:00:00,0.3615,0.3691,0.3614,0.3688,19678900.0,69.6849,34.733219,35.353221,-0.096031,...,7.045257,0.001233,0.000785,0.000447,0.0,0.159713,0.002303,0.346717,0,buy
5,2022-12-27 10:00:00,0.1785,0.1794,0.1777,0.1785,2157869.0,71.380013,34.564017,34.117011,-0.041376,...,6.730134,0.000123,0.000277,-0.000155,-0.171348,-0.088223,0.002358,0.165029,1,buy
6,2022-12-27 11:00:00,116.1,116.7,115.3,115.5,3039.31,74.723751,81.51313,83.198644,-0.014239,...,0.832463,0.377508,0.098834,0.278674,0.0,-0.866341,0.978604,104.266667,0,buy


(6677, 505)

# Train_test_split by ticker group

In [3]:
from sklearn.model_selection import GroupShuffleSplit 

splitter = GroupShuffleSplit(test_size=0.1, n_splits=2, random_state = 7)

split = splitter.split(df, groups=df['ticker'])
train_inds, test_inds = next(split)

train_df = df.iloc[train_inds]
test_df = df.iloc[test_inds]

# Pytorch

### Create dataset

In [4]:
test_size=0.2

x_data = train_df.drop(['target', 'time', 'ticker', 'pattern', 'ttype'], axis=1)
y_data = train_df['target']
x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size=test_size, shuffle=True)
input_channels = x_data.shape[1]
output_size = len(set(y_data))

scaler = StandardScaler()
x_train[x_train.columns] = scaler.fit_transform(x_train)
x_valid[x_valid.columns] = scaler.transform(x_valid)

x_train = torch.tensor(x_train.values, dtype=torch.float32)
x_valid = torch.tensor(x_valid.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
y_valid = torch.tensor(y_valid.values, dtype=torch.float32)

display(type(x_train), type(y_train))

torch.Tensor

torch.Tensor

### Find available device

In [5]:
# find available device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Build model

In [6]:
class SigModel(torch.nn.Module):
    def __init__(self, input_channels):
        super(SigModel, self, ).__init__()
        self.layers = torch.nn.Sequential()
        self.layers.add_module('lin1', torch.nn.Linear(input_channels, 64))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('do1', torch.nn.Dropout(p=0.25))
        self.layers.add_module('lin2', torch.nn.Linear(64, 128))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('do2', torch.nn.Dropout(p=0.25))
        self.layers.add_module('lin3', torch.nn.Linear(128, 96))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('do3', torch.nn.Dropout(p=0.25))
        self.layers.add_module('lin4', torch.nn.Linear(96, 32))
        self.layers.add_module('relu4', torch.nn.ReLU())
        self.layers.add_module('do4', torch.nn.Dropout(p=0.25))
        self.layers.add_module('lin5', torch.nn.Linear(32, 1))
        self.layers.add_module('sigmoid', torch.nn.Sigmoid())
    
    def forward(self, input):
        return self.layers(input)

# Train model

In [7]:
from torch import nn
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR

# train function
def train_epoch(model, train_loader, criterion, optimizer, scheduler):
    # put the model in train mode
    model.train()
    
    x_train, x_valid, y_train, y_valid = train_loader

    # get output of the model
    train_preds = model(x_train).squeeze()
    # calculate train loss
    train_loss = criterion(train_preds, y_train)
    train_acc = (train_preds.round() == y_train).float().mean()
    
    # set gradient to zero to prevent it accumulation
    optimizer.zero_grad() # ~ model.zero_grad()
    # calculate gradient
    train_loss.backward() 
    # update weights
    optimizer.step()
    
    # put the model in evaluation mode
    model.eval()

    with torch.no_grad():
        val_preds = model(x_valid).squeeze()
        val_loss = criterion(val_preds, y_valid)
        val_acc = (val_preds.round() == y_valid).float().mean()
    
    # update weights according to gradient value
    scheduler.step(val_loss)
    
    return train_loss, train_acc, val_loss, val_acc

# Initialize model
if CFG.train_NN:
    model = SigModel(input_channels).to(device)

    # Number of epochs
    epochs = 100000

    # Send data to the device
    x_train, x_valid = x_train.to(device), x_valid.to(device)
    y_train, y_valid = y_train.to(device), y_valid.to(device)
    train_loader = x_train, x_valid, y_train, y_valid

    # Empty loss lists to track values
    epoch_count, train_loss_values, valid_loss_values = [], [], []

    criterion = nn.BCELoss()
    learning_rate = 1e-6
    optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2000, threshold=1e-2)

    # Loop through the data
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc, val_loss, val_acc = train_epoch(model, train_loader, criterion, optimizer, scheduler)

        # Print progress a total of 20 times
        if epoch % int(epochs / 20) == 0:
            print(f'Epoch: {epoch:4.0f} | Train Loss: {train_loss:.5f}, Train Acc: {train_acc:.5f}\
                Validation Loss: {val_loss:.5f}, Val Acc: {val_acc:.5f}\
                    LR: {optimizer.state_dict()["param_groups"][0]["lr"]}')

            epoch_count.append(epoch)
            train_loss_values.append(train_loss.cpu().detach().numpy())
            valid_loss_values.append(val_loss.cpu().detach().numpy())


# Plot NN train results

In [8]:
import matplotlib.pyplot as plt

if CFG.train_NN:
    plt.plot(epoch_count, train_loss_values, label='Training Loss')
    plt.plot(epoch_count, valid_loss_values, label='Validation Loss')
    plt.title('Training & Validation Loss Curves')
    plt.ylabel('Loss')
    plt.xlabel('Epochs')
    plt.legend()
    plt.show()

# Select features

In [9]:
palette = ['#302c36', '#037d97', '#E4591E', '#C09741',
           '#EC5B6D', '#90A6B1', '#6ca957', '#D8E3E2']

blk = Style.BRIGHT + Fore.BLACK
red = Style.BRIGHT + Fore.RED
blu = Style.BRIGHT + Fore.BLUE
res = Style.RESET_ALL

def lgbm_tuning(df, permut=False, boruta=False):
    features = [c for c in df.columns if c not in ['time', 'target', 'ticker', 'pattern', 'ttype']]
    groups = df['ticker']

    outer_cv_score = [] # store all cv scores of outer loop inference

    perm_df_ = pd.DataFrame()
    feature_importances_ = pd.DataFrame()
    boruta_df_ = pd.DataFrame()
    
    for i in range(CFG.n_repeats):
        print(f'Repeat {blu}#{i+1}')
        
        if task_type == 'cls':
            y_fold = df['target']
            kf = StratifiedGroupKFold(n_splits=CFG.n_folds, shuffle=True, random_state=180820231)
            eval_metric = 'logloss'
        else:
            y_fold = (df['target'] - df['close']) / df['close']
            kf = GroupKFold(n_splits=CFG.n_folds)
            eval_metric = 'mse'

        X, y = df[features], y_fold
        oof = np.zeros(len(df))
        models_ = [] # Used to store models trained in the inner loop.
        
        # Stratify based on Class and Alpha (3 types of conditions)
        for fold, (train_idx, val_idx) in enumerate(kf.split(X, y, groups)):
            # Split the dataset according to the fold indexes.
            X_train = X.iloc[train_idx]
            X_val = X.iloc[val_idx]
            y_train = y.iloc[train_idx]
            y_val = y.iloc[val_idx]

            if task_type == 'cls':
                clf = lgb.LGBMClassifier(**params)
            else:
                clf = lgb.LGBMRegressor(**params)
            clf.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], 
                    eval_metric=eval_metric, 
                    callbacks=[lgb.log_evaluation(100)])

            models_.append(clf)

            if task_type == 'cls':
                val_preds = clf.predict_proba(X_val)[:,1]
                val_score = log_loss(y_val, val_preds)
            else:
                val_preds = clf.predict(X_val)
                val_score = mean_squared_error(y_val, val_preds, squared=False)
            
            oof[val_idx] = val_preds
            best_iter = clf.best_iteration_

            print(f'Fold: {blu}{fold + 1:>3}{res}| loss: {blu}{val_score:.5f}{res}| Best iteration: {blu}{best_iter:>4}{res}')

            # permutation importance
            if permut:
                perm = PermutationImportance(clf, scoring=None, n_iter=1, 
                                             random_state=42, cv=None, refit=False).fit(X_val, y_val)

                perm_importance_df = pd.DataFrame({'importance': perm.feature_importances_}, 
                                                    index=X_val.columns).sort_index()

                if perm_df_.shape[0] == 0:
                    perm_df_ = perm_importance_df.copy()
                else:
                    perm_df_ += perm_importance_df

            # gboost feature importance
            f_i = pd.DataFrame(sorted(zip(clf.feature_importances_, X.columns), 
                                      reverse=True, key=lambda x: x[1]), 
                                columns=['Value','Feature'])

            if feature_importances_.shape[0] == 0:
                feature_importances_ = f_i.copy()
            else:
                feature_importances_['Value'] += f_i['Value']
                    
            # BORUTA importance
            if boruta:
                model = BoostBoruta(clf, importance_type='shap_importances', train_importance=False)
                try:
                    model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], 
                            eval_metric=eval_metric, 
                            callbacks=[lgb.log_evaluation(100)])
                except RuntimeError:
                    continue
                
                boruta_importance_df = pd.DataFrame({'importance': model.ranking_}, 
                                                        index=X_train.columns).sort_index()
                if boruta_df_.shape[0] == 0:
                    boruta_df_ = boruta_importance_df.copy()
                else:
                    boruta_df_ += boruta_importance_df

        if task_type == 'cls':
            outer_cv = log_loss(y, oof)
        else:
            outer_cv = mean_squared_error(y, oof, squared=False)
        
        outer_cv_score.append(outer_cv)

    print(f'{red} Outer Holdout avg score: {res} log_loss: {red}{np.mean(outer_cv_score):.5f}{res}')
    print(f'{"*" * 50}\n')
    
    if permut:
        perm_df_ = perm_df_.sort_values('importance', ascending=False)
        perm_df_ = perm_df_.reset_index().rename({'index': 'Feature'}, axis=1)
        
    if boruta and boruta_df_.shape[0] > 0:
        boruta_df_ = boruta_df_.sort_values('importance')
        boruta_df_ = boruta_df_.reset_index().rename({'index': 'Feature'}, axis=1)
                                    
    feature_importances_ = feature_importances_.sort_values('Value', ascending=False).reset_index(drop=True)
    
    return perm_df_, feature_importances_, boruta_df_, np.mean(outer_cv_score)


params = {
          'n_estimators': 2000,
          'learning_rate': 0.02,
        #   'early_stopping_round': 100,
          'max_depth': 10,
          'subsample' : 0.7,
          'colsample_bytree': 0.85,
          'num_leaves': 24,
          'verbosity': -1,
          'importance_type': 'gain',
          'max_bin': 255,
          'reg_alpha': 1e-6,
          'reg_lambda': 1e-8
        }

task_type = 'cls'

if task_type == 'cls':
    params['boosting_type'] = 'dart'
    params['objective'] = 'binary'
else:
    params['boosting_type'] = 'gbdt'
    params['objective'] = 'regression'

if CFG.select_features:
    perm_df_, feature_importances_, boruta_df_, outer_cv_score = lgbm_tuning(df, permut=True, boruta=True)

# Combine importances and save them

In [10]:
if CFG.select_features:
    perm_df_['rank'] = perm_df_['importance'].rank(ascending=False)
    boruta_df_['rank'] = boruta_df_['importance'].rank()
    feature_importances_['rank'] = feature_importances_['Value'].rank(ascending=False)

    fi = pd.concat([perm_df_[['Feature','rank']], feature_importances_[['Feature','rank']], boruta_df_[['Feature','rank']]])
    fi = fi.groupby('Feature')['rank'].sum().sort_values().reset_index()
    fi.to_csv(f'feature_importance_{CFG.ttype}.csv')
else:
    fi = pd.read_csv(f'feature_importance_all.csv')

# Visualize feature distribution

# Train_test split based on ticker groups

In [11]:
def model_train(df, train_df, test_df, features, task_type, how, n_folds, low_bound, high_bound, train_test): 
    X, groups = train_df[features], train_df['ticker']
    X = pd.concat([X, pd.get_dummies(train_df[['pattern']], drop_first=True)], axis=1)
    y = train_df['target']
    
    if train_test == 'fold':
        oof = np.zeros([train_df['target'].shape[0], 1])
        
        kf = StratifiedGroupKFold(n_splits=n_folds, shuffle=True, random_state=24082023)

        oe_enc = OrdinalEncoder()
        groups = oe_enc.fit_transform(groups.values.reshape(-1, 1))

        print(f"Training with {len(features)} features")
        
        for fold, (fit_idx, val_idx) in enumerate(kf.split(X, y, groups)):
            print(f'Fold #{fold + 1}')
            # Split the dataset according to the fold indexes.
            X_train = X.iloc[fit_idx]
            X_val = X.iloc[val_idx]
            y_train = y.iloc[fit_idx]
            y_val = y.iloc[val_idx]
            
            models = list()
            if how == 'lgbm':
                model = lgb.LGBMClassifier(**params)
                model.fit(X_train, y_train, eval_set=[(X_val, y_val)], 
                        eval_metric='logloss', callbacks = [lgb.log_evaluation(100)])
            elif how == 'lreg':
                model = LogisticRegression(C=0.1, max_iter=100000)#, class_weight='balanced')
                model.fit(X_train, y_train)

            val_preds = model.predict_proba(X_val)
            val_score = log_loss(y_val, val_preds)
            prec_score, prec_obj_pct = confident_score(y_val, val_preds[:,1], low_bound, high_bound)
            print(f'Logloss: {val_score}, Confident objects precision: {prec_score}, % of confident objects: {prec_obj_pct}')
            oof[val_idx, 0] = val_preds[:,1]

        return oof, model
    elif train_test == 'full':
        print('Train on full data')
        X, y = df[features], df['target']
        X = pd.concat([X, pd.get_dummies(df[['pattern']], drop_first=True)], axis=1)
        model = lgb.LGBMClassifier(**params)
        model.fit(X, y, eval_set=[(X, y)], eval_metric='logloss', callbacks = [lgb.log_evaluation(100)])
        return np.zeros([df.shape[0], 1]), model
    else:
        # fit model on full dataset and predict on test
        print("Test fold")
        X_test, y_test = test_df[features], test_df['target']
        X_test = pd.concat([X_test, pd.get_dummies(test_df[['pattern']], drop_first=True)], axis=1)
        model = lgb.LGBMClassifier(**params)
        model.fit(X, y, eval_set=[(X_test, y_test)], eval_metric='logloss', callbacks = [lgb.log_evaluation(100)])
        oof_test = np.zeros([test_df['target'].shape[0], 1])
        oof_test[:,0] = model.predict_proba(X_test)[:,1]
        return oof_test, model

def prepare_features(fi, feature_num):
    ''' Get features, sort them by their time appearance and return for using in train and inference'''
    fi = fi['Feature'].head(feature_num)
    feature_dict = defaultdict(list)
    features = list()

    for f in fi:
        period = f.split('_')
        if period[-1].isdigit():
            feature_dict[int(period[-1])].append('_'.join(period[:-2]))
        else:
            feature_dict[0].append(f)

    feature_dict = dict(sorted(feature_dict.items()))
    
    for item in feature_dict.items():
        if item[0] > 0:
            features.extend([i + f'_prev_{item[0]}' for i in item[1]])
        else:
            features.extend([i for i in item[1]])

    feature_dict['features'] = features + ['Pattern_Trend', 'STOCH_RSI']

    return features, feature_dict

def confident_score(y, oof, low_bound, high_bound):
    ''' Consider only high confident objects for accuracy and precision scores calculation;
        object probability must be lower than low_bound or higher than high_bound '''
    pred_conf = np.zeros_like(oof)
    pred_conf[oof > high_bound] = 1
    pred_conf[oof < low_bound] = 0
    # pred_conf_acc = pred_conf[(oof < low_bound) | (oof > high_bound)]
    pred_conf_prec = pred_conf[(oof > high_bound)]
    # y_conf_acc = y.values.reshape(-1,1)[(oof < low_bound) | (oof > high_bound)]
    y_conf_prec = y.values.reshape(-1,1)[(oof > high_bound)]

    return precision_score(y_conf_prec, pred_conf_prec), y_conf_prec.shape[0]/y.shape[0]

train_test = 'full' # fold, test, full, inference
low_bound, high_bound = 0.31, 0.69
feature_num = 160


if CFG.ttype == 'buy':
    low_bound, high_bound = 0.31, 0.69
    params = {
        'boosting_type': 'dart',
        'n_estimators': 2000,
        'learning_rate': 0.02,
        #   'early_stopping_round': 50,
        'max_depth': 10,
        'colsample_bytree': 0.7,
        'subsample': 0.85,
        'subsample_freq': 1,
        'num_leaves': 24,
        'verbosity': -1,
        'max_bin': 255,
        'reg_alpha': 1e-6,
        'reg_lambda': 1e-8,
        'objective': 'binary',
        # 'is_unbalance': True,
        # 'class_weight': 'balanced',
        'metric': 'average_precision'
        }
else:
    low_bound, high_bound = 0.3, 0.7
    params = {
        'boosting_type': 'dart',
        'n_estimators': 2200,
        'learning_rate': 0.02,
        #   'early_stopping_round': 50,
        'max_depth': 10,
        'colsample_bytree': 0.75,
        'subsample': 0.85,
        'subsample_freq': 1,
        'num_leaves': 25,
        'verbosity': -1,
        'max_bin': 255,
        'reg_alpha': 1e-6,
        'reg_lambda': 1e-8,
        'objective': 'binary',
        # 'is_unbalance': True,
        # 'class_weight': 'balanced',
        'metric': 'average_precision'
        }

if CFG.train_LGBM:
    fi = pd.read_csv(f'feature_importance_{CFG.ttype}.csv')
    features, feature_dict = prepare_features(fi, feature_num)
    if train_test != 'inference':
        oof, model = model_train(df, train_df, test_df, features, task_type=task_type, how='lgbm', n_folds=5, low_bound=low_bound, high_bound=high_bound, train_test=train_test)

    if train_test == 'fold':
        y = train_df['target']
        oof_val_score = log_loss(y, oof)
        oof_conf_prec_score, oof_conf_obj_pct = confident_score(y, oof, low_bound, high_bound)
        print(f'Total fold Logloss: {oof_val_score}, Total confident objects precision: {oof_conf_prec_score}, Total % of confident objects: {oof_conf_obj_pct}')
    elif train_test == 'test':
        y_test = test_df['target']
        test_val_score = log_loss(y_test, oof)
        test_conf_prec_score, test_conf_obj_pct = confident_score(y_test, oof, low_bound, high_bound)
        print(f'Total test Logloss: {test_val_score}, Total test confident objects precision: {test_conf_prec_score}, Total % of test confident objects: {test_conf_obj_pct}')
        # save feature dictionary for further inference
        joblib.dump(model, f'lgbm_{CFG.ttype}.pkl')
    elif train_test == 'full':
        joblib.dump(model, f'lgbm_{CFG.ttype}.pkl')
        # save feature dictionary for further inference
        with open(f'features_{CFG.ttype}.json', 'w') as f:
            json.dump(feature_dict, f)
    elif train_test == 'inference':
        model = joblib.load(f'lgbm_{CFG.ttype}.pkl')
        X_test, y_test = test_df[features], test_df['target']
        X_test = pd.concat([X_test, pd.get_dummies(test_df[['pattern']], drop_first=True)], axis=1)
        oof = np.nan_to_num(model.predict_proba(X_test)[:,1])
        test_val_score = log_loss(y_test, oof)
        test_conf_prec_score, test_conf_obj_pct = confident_score(y_test, oof, low_bound, high_bound)
        print(f'Loaded model test Logloss: {test_val_score}, Loaded model test confident objects precision: {test_conf_prec_score}, Total % of test confident objects: {test_conf_obj_pct}')
    

Train on full data
[100]	training's binary_logloss: 0.643439	training's average_precision: 0.846616
[200]	training's binary_logloss: 0.620303	training's average_precision: 0.868642
[300]	training's binary_logloss: 0.596127	training's average_precision: 0.901756
[400]	training's binary_logloss: 0.573402	training's average_precision: 0.926093
[500]	training's binary_logloss: 0.550141	training's average_precision: 0.946718
[600]	training's binary_logloss: 0.5406	training's average_precision: 0.959144
[700]	training's binary_logloss: 0.522762	training's average_precision: 0.970621
[800]	training's binary_logloss: 0.51051	training's average_precision: 0.976859
[900]	training's binary_logloss: 0.493934	training's average_precision: 0.982936
[1000]	training's binary_logloss: 0.48207	training's average_precision: 0.987618
[1100]	training's binary_logloss: 0.46784	training's average_precision: 0.990962
[1200]	training's binary_logloss: 0.459573	training's average_precision: 0.992904
[1300]	trai

Buy

Total fold Logloss: 0.6702365645667008, Total confident objects precision: 0.7347767253044655, Total % of confident objects: 0.12053498613603002

Total test Logloss: 0.667474687137724, Total test confident objects precision: 0.7719298245614035, Total % of test confident objects: 0.1043956043956044

Sell

Total fold Logloss: 0.6309185725054093, Total confident objects precision: 0.7591888466413181, Total % of confident objects: 0.26529926025554806

Total test Logloss: 0.6428094828596689, Total test confident objects precision: 0.7267441860465116, Total % of test confident objects: 0.23243243243243245

# Count predictions according to pattern

In [12]:
train_df['target'].value_counts()

1    3189
0    2942
Name: target, dtype: int64

In [13]:
if train_test == 'fold':
    train_df.loc[:,'oof'] = oof >= high_bound
    display(train_df.groupby('pattern')['oof'].agg(['mean', 'count']))