In [1]:
import context_engineering_functions as cef
import pandas as pd
import pickle

In [2]:
data = '../data/clean/'

only_basic_picks, _ = cef.create_basic_pick_veto_triples(data)
only_proportion_picks, _ = cef.create_basic_pick_veto_triples(data, pick_reward_function=cef.get_proportion_rewards)
basic_picks, basic_vetos = cef.create_basic_pick_veto_triples(data)
proportion_picks, proportion_vetos = cef.create_basic_pick_veto_triples(data, pick_reward_function=cef.get_proportion_rewards)

In [None]:
only_basic_picks.shape, only_proportion_picks.shape

In [None]:
basic_picks.shape, basic_vetos.shape

In [None]:
proportion_picks.shape, proportion_vetos.shape

In [None]:
proportion_picks.head()

In [3]:
from bandit import Bandit, VetoBandit, ComboBandit, EpisodicBandit

In [10]:
# bandit_types = [(Bandit, None),
#                 (Bandit, VetoBandit),
#                 (ComboBandit, None),
#                 (EpisodicBandit, None),
#                 ]

datasets = {'basic': (only_basic_picks, None),
            'proportion': (only_proportion_picks, None),
            'basic_veto': (basic_picks, basic_vetos),
            'proportion_veto': (proportion_picks, proportion_vetos),
            }

contexts = ['maps_only', 'winprob', 'map_winprob', 'both']

baselines = [True, False]

step_sizes = [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]

epochs = [1, 3, 5, 10]

def get_cols(x):
    cols = ['de_dust2_is_available',
            'de_inferno_is_available',
            'de_mirage_is_available',
            'de_nuke_is_available',
            'de_overpass_is_available',
            'de_train_is_available',
            'de_vertigo_is_available',
            ]

    if x in ('winprob', 'both'):
        cols.extend(['DecisionTeam_WinPercent',
                     'OtherTeam_WinPercent'])
    if x in ('map_winprob', 'both'):
        cols.extend([col for col in proportion_picks.columns if 'WinPct' in col])
    return cols

In [11]:
# simple bandit

from logging_policy import LoggingPolicy
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
from evaluation import train_value_estimator, evaluate

results = []

for dataset, (pick_df, veto_df) in datasets.items():

    # Skip vetoes here
    if dataset in ('basic_veto', 'proportion_veto'):
        continue
    
    X = pick_df
    A = pick_df['X_Action']
    Y = pick_df['Y_reward']
    
    # train test split
    X_train, X_test, A_train, A_test, Y_train, Y_test = train_test_split(X, A.values, Y.values, test_size=0.2, random_state=13)
    
    lp = LoggingPolicy(X_train, X_train['X_Action'])

    for context in contexts:
        for baseline in baselines:
            for step_size in step_sizes:
                for n_epochs in epochs:
                    print(f'Training simple bandit for {n_epochs} epochs, {"with" if baseline else "without"} '
                          f'baseline, step size {step_size}, context {context}, dataset {dataset}')
                    
                    cols = get_cols(context)
                    
                    n_features = len(cols)
                    X = X_train[cols].values # numpy array for bandit

                    bandit = Bandit(n_features,
                                    n_arms=7,
                                    step_size=step_size,
                                    baseline=baseline)

                    # train bandit
                    
                    actual_epochs_trained = 0
                    
                    for _ in range(n_epochs):
                        try:
                            for i in range(X.shape[0]):
                                bandit.update_theta(X[i].reshape(1, -1), A_train[i], Y_train[i])
                            if veto_df is not None:
                                pass
                            actual_epochs_trained += 1
                        except ValueError:  # we encountered an overflow
                            break


                    # eval on test set
                    try:
                        action_to_model_dict = train_value_estimator(X,
                                                                     X_train,
                                                                     A_train,
                                                                     Y_train,
                                                                     log_policy=lp,
                                                                     target_bandit=bandit,
                                                                     )

                        result = evaluate(X_test[cols].values,
                                          X_test,
                                          A_test,
                                          Y_test,
                                          log_policy=lp,
                                          target_bandit=bandit,
                                          action_to_model_dict=action_to_model_dict,
                                          )
                    except ValueError:
                        result = {'mean': None, 'IW': None, 'SN_IW': None, 'Direct_Method_IW': None}

                    result['dataset'] = dataset
                    result['context'] = context
                    result['baseline'] = baseline
                    result['step_size'] = step_size
                    result['n_epochs_actual'] = actual_epochs_trained
                    result['bandit_type'] = 'simplebandit'

                    results.append(result)

Training simple bandit for 1 epochs, with baseline, step size 0.001, context maps_only, dataset basic
Training simple bandit for 3 epochs, with baseline, step size 0.001, context maps_only, dataset basic
Training simple bandit for 5 epochs, with baseline, step size 0.001, context maps_only, dataset basic
Training simple bandit for 10 epochs, with baseline, step size 0.001, context maps_only, dataset basic


KeyboardInterrupt: 

In [None]:
# SAVE Results
with open('results-simple.pckl', 'wb') as f:
    pickle.dump(results, f)

In [None]:
# LOAD Results
with open('results-simple.pckl', 'rb') as f:
    results = pickle.load(f)

In [13]:
# split bandit

for dataset, (pick_df, veto_df) in datasets.items():
    X = pick_df
    A = pick_df['X_Action']
    Y = pick_df['Y_reward']

    
    # train test split
    X_train, X_test, A_train, A_test, Y_train, Y_test = train_test_split(X, A.values, Y.values, test_size=0.2, random_state=13)
    
    if dataset in ('basic_veto', 'proportion_veto'):
    
        X_veto = veto_df
        A_veto = veto_df['X_Action']
        Y_veto = veto_df['Y_reward']

        (X_veto_train, X_veto_test,
         A_veto_train, A_veto_test,
         Y_veto_train, Y_veto_test) = train_test_split(X_veto, A_veto.values, Y_veto.values, test_size=0.2, random_state=13)
        # LP takes vetoes too
        lp = LoggingPolicy(X_train, X_train['X_Action'],X_veto_train,X_veto_train['X_Action'])
    else:
        lp = LoggingPolicy(X_train, X_train['X_Action'])

    for context in contexts:
        for baseline in baselines:
            for step_size in step_sizes:
                for n_epochs in epochs:
                    print(f'Training split bandit for {n_epochs} epochs, {"with" if baseline else "without"} '
                          f'baseline, step size {step_size}, context {context}, dataset {dataset}')
                    
                    cols = get_cols(context)
                    
                    n_features = len(cols)
                    X = X_train[cols].values
                    
                    if dataset in ('basic_veto', 'proportion_veto'):
                        X_veto = X_veto_train[cols].values

                    bandit = Bandit(n_features,
                                    n_arms=7,
                                    step_size=step_size,
                                    baseline=baseline)

                    veto_bandit = VetoBandit(n_features,
                                             n_arms=7,
                                             step_size=step_size,
                                             baseline=baseline)

                    # train bandit
                    
                    actual_epochs_trained = 0
                    
                    for _ in range(n_epochs):
                        try:
                            for i in range(X.shape[0]):
                                bandit.update_theta(X[i].reshape(1, -1), A_train[i], Y_train[i])
                            if veto_df is not None:
                                for matchid in veto_df['MatchId'].unique():
                                    indices = X_veto_train[X_veto_train['MatchId'] == matchid].index
                                    veto_bandit.update_theta(X_veto[indices].reshape(1, -1),
                                                             A_veto_train[indices],
                                                             Y_veto_train[indices])
                            actual_epochs_trained += 1
                        except ValueError:  # we encountered an overflow
                            break


                    # eval on test set
                    try:
                        action_to_model_dict = train_value_estimator(X,
                                                                     X_train,
                                                                     A_train,
                                                                     Y_train,
                                                                     log_policy=lp,
                                                                     target_bandit=bandit,
                                                                     )

                        result_pick = evaluate(X_test[cols].values,
                                               X_test,
                                               A_test,
                                               Y_test,
                                               log_policy=lp,
                                               target_bandit=bandit,
                                               action_to_model_dict=action_to_model_dict,
                                               )
                        
                        if dataset in ('basic_veto', 'proportion_veto'):

                            action_to_model_dict = train_value_estimator(X_veto,
                                                                         X_veto_train,
                                                                         A_veto_train,
                                                                         Y_veto_train,
                                                                         log_policy=lp,
                                                                         target_bandit=veto_bandit,
                                                                         veto_flags=pd.Series(['veto']*X_veto_train.shape[0])
                                                                         )

                            result_veto = evaluate(X_veto_test[cols].values,
                                                   X_veto_test,
                                                   A_veto_test,
                                                   Y_veto_test,
                                                   log_policy=lp,
                                                   target_bandit=veto_bandit,
                                                   action_to_model_dict=action_to_model_dict,
                                                   veto_flags=pd.Series(['veto']*X_veto_test.shape[0])
                                                   )
                    except ValueError:
                        result_pick = {'mean': None, 'IW': None, 'SN_IW': None, 'Direct_Method_IW': None}
                        if dataset in ('basic_veto', 'proportion_veto'):
                            result_veto = {'mean': None, 'IW': None, 'SN_IW': None, 'Direct_Method_IW': None}

                    result_pick['dataset'] = dataset
                    result_pick['context'] = context
                    result_pick['baseline'] = baseline
                    result_pick['step_size'] = step_size
                    result_pick['n_epochs_actual'] = actual_epochs_trained
                    result_pick['bandit_type'] = 'splitbandit-pick'
                    
                    if dataset in ('basic_veto', 'proportion_veto'):
                        result_veto['dataset'] = dataset
                        result_veto['context'] = context
                        result_veto['baseline'] = baseline
                        result_veto['step_size'] = step_size
                        result_veto['n_epochs_actual'] = actual_epochs_trained
                        result_veto['bandit_type'] = 'splitbandit-veto'

                    results.append(result_pick)
                    if dataset in ('basic_veto', 'proportion_veto'):
                        results.append(result_veto)

Training split bandit for 1 epochs, with baseline, step size 0.001, context maps_only, dataset basic
Training split bandit for 3 epochs, with baseline, step size 0.001, context maps_only, dataset basic
Training split bandit for 5 epochs, with baseline, step size 0.001, context maps_only, dataset basic


KeyboardInterrupt: 

In [None]:
with open('results-split.pckl', 'wb') as f:
    pickle.dump(results, f)

In [None]:
with open('results-split.pckl', 'rb') as f:
    results = pickle.load(f)

In [None]:
# combo bandit

for dataset, (pick_df, veto_df) in datasets.items():
    
    pick_df['action_type'] = 'pick'
    
    if dataset in ('basic_veto', 'proportion_veto'):
        veto_df['action_type'] = 'veto'
        pick_df = pd.concat([pick_df, veto_df], axis=0, ignore_index=True)
    
    X = pick_df
    A = pick_df['X_Action']
    Y = pick_df['Y_reward']
    
    # train test split
    X_train, X_test, A_train, A_test, Y_train, Y_test = train_test_split(X, A.values, Y.values, test_size=0.2, random_state=13)
    
    # LoggingPolicy either includes veto data or not
    if dataset in ('basic_veto', 'proportion_veto'):
        lp = LoggingPolicy(X_train[X_train['action_type']=='pick'], X_train[X_train['action_type']=='pick']['X_Action'], \
                       X_train[X_train['action_type']=='veto'], X_train[X_train['action_type']=='veto']['X_Action'])
    else:
        lp = LoggingPolicy(X_train, X_train['X_Action'])

    for context in contexts:
        for baseline in baselines:
            for step_size in step_sizes:
                for n_epochs in epochs:
                    print(f'Training split bandit for {n_epochs} epochs, {"with" if baseline else "without"} '
                          f'baseline, step size {step_size}, context {context}, dataset {dataset}')
                    
                    cols = get_cols(context)
                    
                    n_features = len(cols)
                    X = X_train[cols].values

                    bandit = ComboBandit(n_features,
                                         n_arms=7,
                                         step_size=step_size,
                                         baseline=baseline)

                    # train bandit
                    
                    actual_epochs_trained = 0
                    
                    for _ in range(n_epochs):
                        try:
                            for i in range(X.shape[0]):
                                bandit.update_theta(X[i].reshape(1, -1), A_train[i],
                                                    Y_train[i],
                                                    action_type=X_train['action_type'].values.reshape(1, -1)[i])
                            actual_epochs_trained += 1
                        except ValueError:  # we encountered an overflow
                            break


                    # eval on test set
                    try:
                        action_to_model_dict = train_value_estimator(X,
                                                                     X_train,
                                                                     A_train,
                                                                     Y_train,
                                                                     log_policy=lp,
                                                                     target_bandit=bandit,
                                                                     veto_flags=X_train['action_type']
                                                                     )

                        result = evaluate(X_test[cols].values,
                                          X_test,
                                          A_test,
                                          Y_test,
                                          log_policy=lp,
                                          target_bandit=bandit,
                                          action_to_model_dict=action_to_model_dict,
                                          veto_flags=X_test['action_type']
                                          )
                    except ValueError:
                        result = {'mean': None, 'IW': None, 'SN_IW': None, 'Direct_Method_IW': None}

                    result['dataset'] = dataset
                    result['context'] = context
                    result['baseline'] = baseline
                    result['step_size'] = step_size
                    result['n_epochs_actual'] = actual_epochs_trained
                    result['bandit_type'] = 'combobandit'
                    
                    results.append(result)

Training split bandit for 1 epochs, with baseline, step size 0.001, context maps_only, dataset basic


In [None]:
with open('results-combo.pckl', 'wb') as f:
    pickle.dump(results, f)

In [None]:
with open('results-combo.pckl', 'rb') as f:
    results = pickle.load(f)

In [None]:
# episodic bandit

import pandas as pd

for dataset, (pick_df, veto_df) in datasets.items():
    
    pick_df['action_type'] = 'pick'
    
    if dataset in ('basic_veto', 'proportion_veto'):
        veto_df['action_type'] = 'veto'
        pick_df = pd.concat([pick_df, veto_df], axis=0, ignore_index=True)
    
    X = pick_df
    A = pick_df['X_Action']
    Y = pick_df['Y_reward']
    
    # train test split - match-based since episodic
    
    train_matchids, test_matchids = train_test_split(X['MatchId'], test_size=0.2, random_state=13)
    
    
    X_train = X[X['MatchId'].isin(train_matchids)]
    A_train = A[X['MatchId'].isin(train_matchids)].values
    Y_train = Y[X['MatchId'].isin(train_matchids)].values

    X_test = X[X['MatchId'].isin(test_matchids)]
    A_test = A[X['MatchId'].isin(test_matchids)].values
    Y_test = Y[X['MatchId'].isin(test_matchids)].values

    # LoggingPolicy either includes veto data or not
    if dataset in ('basic_veto', 'proportion_veto'):
        lp = LoggingPolicy(X_train[X_train['action_type']=='pick'], X_train[X_train['action_type']=='pick']['X_Action'], \
                       X_train[X_train['action_type']=='veto'], X_train[X_train['action_type']=='veto']['X_Action'])
    else:
        lp = LoggingPolicy(X_train, X_train['X_Action'])

    for context in contexts:
        for baseline in baselines:
            for step_size in step_sizes:
                for n_epochs in epochs:
                    print(f'Training split bandit for {n_epochs} epochs, {"with" if baseline else "without"} '
                          f'baseline, step size {step_size}, context {context}, dataset {dataset}')
                    
                    cols = get_cols(context)
                    
                    n_features = len(cols)
                    X = X_train[cols].values

                    bandit = EpisodicBandit(n_features,
                                         n_arms=7,
                                         step_size=step_size,
                                         baseline=baseline)

                    # train bandit
                    
                    actual_epochs_trained = 0
                    
                    for _ in range(n_epochs):
                        try:
                            for matchid in X_train['MatchId'].unique():
                                indices = X_train[X_train['MatchId'] == matchid].index
                                
                                bandit.update_theta(X[indices], A_train[indices],
                                                    Y_train[indices], action_types=X_train['action_type'].iloc[indices].values)
                            actual_epochs_trained += 1
                        except ValueError:  # we encountered an overflow
                            break


                    # eval on test set
                    try:
                        action_to_model_dict = train_value_estimator(X,
                                                                     X_train,
                                                                     A_train,
                                                                     Y_train,
                                                                     log_policy=lp,
                                                                     target_bandit=bandit,
                                                                     veto_flags=X_train['action_type']
                                                                     )

                        result = evaluate(X_test[cols].values,
                                          X_test,
                                          A_test,
                                          Y_test,
                                          log_policy=lp,
                                          target_bandit=bandit,
                                          action_to_model_dict=action_to_model_dict,
                                          veto_flags=X_test['action_type']
                                          )
                    except ValueError:
                        result = {'mean': None, 'IW': None, 'SN_IW': None, 'Direct_Method_IW': None}

                    result['dataset'] = dataset
                    result['context'] = context
                    result['baseline'] = baseline
                    result['step_size'] = step_size
                    result['n_epochs_actual'] = actual_epochs_trained
                    result['bandit_type'] = 'episodicbandit'
                    
                    results.append(result)

In [None]:
with open('results-episodic.pckl', 'wb') as f:
    pickle.dump(results, f)

In [None]:
with open('results-episodic.pckl', 'rb') as f:
    results = pickle.load(f)

In [None]:
with open('results-full.pckl', 'wb') as f:
    pickle.dump(results, f)

In [None]:
# from logging_policy import LoggingPolicy
# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import RidgeCV
# from evaluation import train_value_estimator, evaluate

# results = []

# for dataset, (pick_df, veto_df) in datasets.items():
    
#     if dataset in ('basic_veto', 'proportion_veto'):
#         continue
    
#     X = pick_df
#     A = pick_df['X_Action']
#     Y = pick_df['Y_reward']
    
#     # train test split
#     X_train, X_test, A_train, A_test, Y_train, Y_test = train_test_split(X, A.values, Y.values, test_size=0.2, random_state=13)
    
#     lp = LoggingPolicy(X_train, X_train['X_Action'])

#     for context in contexts:
#         for baseline in baselines:
#             for step_size in step_sizes:
#                 for n_epochs in epochs:
#                     print(f'Training simple bandit for {n_epochs} epochs, {"with" if baseline else "without"} '
#                           f'baseline, step size {step_size}, context {context}, dataset {dataset}')
                    
#                     cols = get_cols(context)
                    
#                     n_features = len(cols)
#                     X = X_train[cols].values

#                     bandit = Bandit(n_features,
#                                     n_arms=7,
#                                     step_size=step_size,
#                                     baseline=baseline)

#                     # veto_bandit = veto_bandit_type(n_features,
#                     #                                n_arms=7,
#                     #                                step_size=step_size,
#                     #                                baseline=baseline)

#                     # train bandit
                    
#                     actual_epochs_trained = 0
                    
#                     for _ in range(n_epochs):
#                         try:
#                             for i in range(X.shape[0]):
#                                 bandit.update_theta(X[i].reshape(1, -1), A_train[i], Y_train[i])
#                             if veto_df is not None:
#                                 pass
#                                 # if veto_bandit_type is not None and type(bandit_type) != Bandit:
#                                 #     bandit.update_theta(X_train[i].reshape(1, -1), A_train[i], Y_train[i])
#                                 # else:
#                                 #     veto_bandit.update_theta(X_train[i].reshape(1, -1), A_train[i], Y_train[i])
#                             actual_epochs_trained += 1
#                         except ValueError:  # we encountered an overflow
#                             break


#                     # eval on test set
#                     try:
#                         action_to_model_dict = train_value_estimator(X,
#                                                                      X_train,
#                                                                      A_train,
#                                                                      Y_train,
#                                                                      log_policy=lp,
#                                                                      target_bandit=bandit,
#                                                                      )

#                         result = evaluate(X_test[cols].values,
#                                           X_test,
#                                           A_test,
#                                           Y_test,
#                                           log_policy=lp,
#                                           target_bandit=bandit,
#                                           action_to_model_dict=action_to_model_dict,
#                                           )
#                     except ValueError:
#                         result = {'mean': None, 'IW': None, 'SN_IW': None, 'Direct_Method_IW': None}

#                     result['dataset'] = dataset
#                     result['context'] = context
#                     result['baseline'] = baseline
#                     result['step_size'] = step_size
#                     result['n_epochs_actual'] = actual_epochs_trained
#                     result['bandit_type'] = 'simplebandit'

#                     results.append(result)

In [None]:
import pandas as pd

df_results = pd.DataFrame.from_records(results)
df_results.head()

In [None]:
df_results.fillna(0).sort_values(by='SN_IW', ascending=False)

In [None]:
df_results['Direct_Method_IW'].hist(bins=100)