In [3]:
import gc
import json
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import gridspec
from time import time
from tqdm.auto import tqdm
from collections import Counter, defaultdict
from scipy import stats
import lightgbm as lgb
from sklearn.metrics import cohen_kappa_score, mean_squared_error
from sklearn.model_selection import KFold, StratifiedKFold, GroupKFold
from pprint import pformat, pprint
import warnings
warnings.filterwarnings("ignore")


This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [5]:
# read data
train = pd.read_csv('../input/data-science-bowl-2019/train.csv')
test = pd.read_csv('../input/data-science-bowl-2019/test.csv')
train_labels = pd.read_csv('../input/data-science-bowl-2019/train_labels.csv')
specs = pd.read_csv('../input/data-science-bowl-2019/specs.csv')
sample_submission = pd.read_csv('../input/data-science-bowl-2019/sample_submission.csv')


FileNotFoundError: [Errno 2] File b'../input/data-science-bowl-2019/train.csv' does not exist: b'../input/data-science-bowl-2019/train.csv'

In [None]:
# make a list with all the unique 'titles' from the train and test set
list_of_user_activities = sorted(list(set(train['title'].unique()).union(set(test['title'].unique()))))

# make a list with all the unique 'event_code' from the train and test set
list_of_event_code = sorted(list(set(train['event_code'].unique()).union(set(test['event_code'].unique()))))
list_of_event_id = sorted(list(set(train['event_id'].unique()).union(set(test['event_id'].unique()))))

# make a list with all the unique worlds from the train and test set
list_of_worlds = sorted(list(set(train['world'].unique()).union(set(test['world'].unique()))))

# create a dictionary numerating the titles
activities_map = dict(zip(list_of_user_activities, np.arange(len(list_of_user_activities))))
activities_labels = dict(zip(np.arange(len(list_of_user_activities)), list_of_user_activities))
activities_world = dict(zip(list_of_worlds, np.arange(len(list_of_worlds))))
assess_titles = sorted(list(set(train[train['type'] == 'Assessment']['title'].unique()).union(set(test[test['type'] == 'Assessment']['title'].unique()))))

# replace the text titles with the number titles from the dict
train['title'] = train['title'].map(activities_map)
test['title'] = test['title'].map(activities_map)
train['world'] = train['world'].map(activities_world)
test['world'] = test['world'].map(activities_world)
train_labels['title'] = train_labels['title'].map(activities_map)
win_code = dict(zip(activities_map.values(), (4100*np.ones(len(activities_map))).astype('int')))

# then, it set one element, the 'Bird Measurer (Assessment)' as 4110, 10 more than the rest
win_code[activities_map['Bird Measurer (Assessment)']] = 4110

# convert text into datetime
train['timestamp'] = pd.to_datetime(train['timestamp'])
test['timestamp'] = pd.to_datetime(test['timestamp'])

In [None]:
def eval_qwk_lgb_regr(y_true, y_pred):
    """
    Fast cappa eval function for lgb.
    """
    dist = Counter(reduce_train['accuracy_group'])
    for k in dist:
        dist[k] /= len(reduce_train)
    reduce_train['accuracy_group'].hist()
    
    acum = 0
    bound = {}
    for i in range(3):
        acum += dist[i]
        bound[i] = np.percentile(y_pred, acum * 100)

    def classify(x):
        if x <= bound[0]:
            return 0
        elif x <= bound[1]:
            return 1
        elif x <= bound[2]:
            return 2
        else:
            return 3

    y_pred = np.array(list(map(classify, y_pred))).reshape(y_true.shape)
    return 'cappa', cohen_kappa_score(y_true, y_pred, weights='quadratic'), True

def pretty_json(data):
    return json.dumps(json.loads(data), indent=2, sort_keys=False)


def pretty_display(df):
    style = """

    """
    return display(HTML(df.to_html().replace('\\n', '<br>') + style))

def get_installation_sample(df):
    return df.groupby('installation_id').apply(lambda g: g.sample(random_state=28))

In [None]:
# this is the function that convert the raw data into processed features
def get_data(user_sample, test_set=False):
    '''
    The user_sample is a DataFrame from train or test where the only one 
    installation_id is filtered
    And the test_set parameter is related with the labels processing, that is only requered
    if test_set=False
    '''
    # Constants and parameters declaration
    last_activity = 0
    
    user_activities_count = {'Clip': 0, 'Activity': 0, 'Assessment': 0, 'Game': 0}
    round_max_to_consider = 3
    
    # new features: time spent in each activity
    last_session_time_sec = 0
    accuracy_groups = {0: 0, 1: 0, 2: 0, 3: 0}
    all_assessments = []
    accumulated_accuracy_group = 0
    accumulated_accuracy = 0
    accumulated_correct_attempts = 0 
    accumulated_uncorrect_attempts = 0
    accumulated_actions = 0
    counter = 0
    time_first_activity = float(user_sample['timestamp'].values[0])
    durations = []
    last_accuracy_title = {'acc_' + title: -1 for title in assess_titles}
    event_code_count: Dict[str, int] = {ev: 0 for ev in list_of_event_code}
    event_id_count: Dict[str, int] = {eve: 0 for eve in list_of_event_id}
    title_count: Dict[str, int] = {eve: 0 for eve in activities_labels.values()}
    
    round_stats = {}
    round_stats['round_time'] = defaultdict(list)
    round_stats['round_event_count'] = defaultdict(list)
    round_stats['round_miss'] = defaultdict(list)
    round_stats['round_count'] = defaultdict(list)
    round_stats['incomplete_round'] = defaultdict(list)
    round_stats['incomplete_last_round_time'] = []
    round_stats['no_round_count'] = 0
    
    session_stats = {}
    session_stats['session_time'] = defaultdict(list)
    session_stats['session_event_count'] = defaultdict(list)

    # last features
    sessions_count = 0
    
    # itarates through each session of one instalation_id
    for i, session in user_sample.groupby('game_session', sort=False):
        # i = game_session_id
        # session is a DataFrame that contain only one game_session
        
        # get some sessions information
        session_type = session['type'].iloc[0]
        session_title = session['title'].iloc[0]
        session_title_text = activities_labels[session_title]
        installation_id = session['installation_id'].iloc[-1]
        
        # for each assessment, and only this kind off session, the features below are processed
        # and a register are generated
        if (session_type == 'Assessment') & (test_set or len(session) > 1):
            # search for event_code 4100, that represents the assessments trial
            all_attempts = session.query(f'event_code == {win_code[session_title]}')
            # then, check the numbers of wins and the number of losses
            true_attempts = all_attempts['event_data'].str.contains('true').sum()
            false_attempts = all_attempts['event_data'].str.contains('false').sum()
            # copy a dict to use as feature template, it's initialized with some itens: 
            # {'Clip':0, 'Activity': 0, 'Assessment': 0, 'Game':0}
            features = user_activities_count.copy()
            features.update(event_code_count.copy())
            features.update(event_id_count.copy())
            features.update(title_count.copy())
            features.update(last_accuracy_title.copy())
            features['installation_session_count'] = sessions_count
            
            features.update({'round_time_mean_'+k: np.mean(v) for k, v in round_stats['round_time'].items()})
            features.update({'round_time_max_'+k: np.max(v) for k, v in round_stats['round_time'].items()})
            features.update({'round_time_min_'+k: np.min(v) for k, v in round_stats['round_time'].items()})
            features.update({'round_event_count_mean_'+k: np.mean(v) for k, v in round_stats['round_event_count'].items()})
            features.update({'round_event_count_max_'+k: np.max(v) for k, v in round_stats['round_event_count'].items()})
            features.update({'round_event_count_min_'+k: np.min(v) for k, v in round_stats['round_event_count'].items()})
            features.update({'round_miss_mean_'+k: np.mean(v) for k, v in round_stats['round_miss'].items()})
            features.update({'round_miss_max_'+k: np.max(v) for k, v in round_stats['round_miss'].items()})
            features.update({'round_miss_min_'+k: np.min(v) for k, v in round_stats['round_miss'].items()})
            features.update({'incomplete_round_mean_'+k: np.mean(v) for k, v in round_stats['incomplete_round'].items()})
            features.update({'round_count_max_'+k: np.max(v) for k, v in round_stats['round_count'].items()})
            features['incomplete_round_count'] = 0
            for k, v in round_stats['incomplete_round'].items():
                features['incomplete_round_count'] += len(v)
            features['incomplete_last_round_time'] = np.mean(round_stats['incomplete_last_round_time'])
            features['no_round_count'] = round_stats['no_round_count']
            
            features.update({'session_time_mean_'+k: np.mean(v) for k, v in session_stats['session_time'].items()})
            features.update({'session_time_max_'+k: np.max(v) for k, v in session_stats['session_time'].items()})
            features.update({'session_time_min_'+k: np.min(v) for k, v in session_stats['session_time'].items()})
            
            features.update({'session_event_count_mean_'+k: np.mean(v) for k, v in session_stats['session_event_count'].items()})
            features.update({'session_event_count_max_'+k: np.max(v) for k, v in session_stats['session_event_count'].items()})
            features.update({'session_event_count_min_'+k: np.min(v) for k, v in session_stats['session_event_count'].items()})
            
            variety_features = [('var_event_code', event_code_count),
                              ('var_event_id', event_id_count),
                               ('var_title', title_count)]
            
            for name, dict_counts in variety_features:
                arr = np.array(list(dict_counts.values()))
                features[name] = np.count_nonzero(arr)
                         
            # get installation_id for aggregated features
            features['installation_id'] = installation_id
            
            # add title as feature, remembering that title represents the name of the game
            features['session_title'] = session['title'].iloc[0]
            
            # the 4 lines below add the feature of the history of the trials of this player
            # this is based on the all time attempts so far, at the moment of this assessment
            features['accumulated_correct_attempts'] = accumulated_correct_attempts
            features['accumulated_uncorrect_attempts'] = accumulated_uncorrect_attempts
            accumulated_correct_attempts += true_attempts 
            accumulated_uncorrect_attempts += false_attempts
            
            # the time spent in the app so far
            if durations == []:
                features['duration_mean'] = 0
                features['duration_std'] = 0
            else:
                features['duration_mean'] = np.mean(durations)
                features['duration_std'] = np.std(durations)
            durations.append((session.iloc[-1, 2] - session.iloc[0, 2] ).seconds)
            
            # the accurace is the all time wins divided by the all time attempts
            features['accumulated_accuracy'] = accumulated_accuracy/counter if counter > 0 else 0
            accuracy = true_attempts/(true_attempts+false_attempts) if (true_attempts+false_attempts) != 0 else 0
            accumulated_accuracy += accuracy
            last_accuracy_title['acc_' + session_title_text] = accuracy
            
            # a feature of the current accuracy categorized
            # it is a counter of how many times this player was in each accuracy group
            if accuracy == 0:
                features['accuracy_group'] = 0
            elif accuracy == 1:
                features['accuracy_group'] = 3
            elif accuracy == 0.5:
                features['accuracy_group'] = 2
            else:
                features['accuracy_group'] = 1
            features['accuracy'] = accuracy
            features.update(accuracy_groups)
            accuracy_groups[features['accuracy_group']] += 1
            
            # mean of the all accuracy groups of this player
            features['accumulated_accuracy_group'] = accumulated_accuracy_group/counter if counter > 0 else 0
            accumulated_accuracy_group += features['accuracy_group']
            
            # how many actions the player has done so far, it is initialized as 0 and updated some lines below
            features['accumulated_actions'] = accumulated_actions
                        
            if test_set:
                all_assessments.append(features)
                
            elif true_attempts+false_attempts > 0:
                all_assessments.append(features)
                
            counter += 1
        
        if session_type == 'Game':
            event_data_dict = {}
            completed_event_data = []
            current_round = -1
            for line in session['event_data'].apply(json.loads):
                if 'round' in line and line['round'] not in event_data_dict:
                    event_data_dict[line['round']] = [line]
                    current_round = line['round']
                if current_round > 0:
                    event_data_dict[current_round].append(line)
                if line['event_code'] == 2030:
                    completed_event_data.append(line)

            if event_data_dict:
                round_stats['round_count'][session_title_text].append(max(event_data_dict.keys()))
                for completed in completed_event_data:
                    r = completed['round']
                    round_stats['round_time'][session_title_text].append((completed['game_time'] - event_data_dict[r][0]['game_time'])/1000)
                    round_stats['round_event_count'][session_title_text].append(completed['event_count'] - event_data_dict[r][0]['event_count'])
                    if 'misses' in completed:
                        round_stats['round_miss'][session_title_text].append(completed['misses'])
                completed_rounds = [c['round'] for c in completed_event_data]
                for incompleted in set(event_data_dict.keys()) - set(completed_rounds):
                    round_stats['incomplete_round'][session_title_text].append(incompleted)
                    round_stats['incomplete_last_round_time'].append((event_data_dict[incompleted][-1]['game_time'] - event_data_dict[incompleted][0]['game_time'])/1000)
            else:
                round_stats['no_round_count'] += 1
        
        if session_type in ['Game', 'Assessment']:
            session_stats['session_time'][session_title_text].append(session.iloc[-1]['game_time']/1000)
            session_stats['session_event_count'][session_title_text].append(session.iloc[-1]['event_count'])
        
        sessions_count += 1
        # this piece counts how many actions was made in each event_code so far
        def update_counters(counter: dict, col: str):
                num_of_session_count = Counter(session[col])
                for k in num_of_session_count.keys():
                    x = k
                    if col == 'title':
                        x = activities_labels[k]
                    counter[x] += num_of_session_count[k]
                return counter
            
        event_code_count = update_counters(event_code_count, "event_code")
        event_id_count = update_counters(event_id_count, "event_id")
        title_count = update_counters(title_count, 'title')

        # counts how many actions the player has done so far, used in the feature of the same name
        accumulated_actions += len(session)
        if last_activity != session_type:
            user_activities_count[session_type] += 1
            last_activitiy = session_type
                        
    if test_set:
        return all_assessments[-1]
    # in the train_set, all assessments goes to the dataset
    return all_assessments

def get_data_test(user_sample):
    return get_data(user_sample, True)

def get_train_and_test(train, test):
    compiled_train = []
    compiled_train_list = map(get_data, [user_sample for _, user_sample in train.groupby('installation_id', sort = False)])
    compiled_test_list = list(map(get_data_test, [user_sample for _, user_sample in test.groupby('installation_id', sort = False)]))
    for elem in compiled_train_list:
        compiled_train += elem
        
    reduce_train = pd.DataFrame(compiled_train)
    reduce_test = pd.DataFrame(compiled_test_list)
    categoricals = ['session_title']
    return reduce_train, reduce_test, categoricals

In [None]:
# tranform function to get the train and test set
reduce_train, reduce_test, categoricals = get_train_and_test(train, test)

reduce_train.columns = ["".join (c if c.isalnum() else "_" for c in str(x)) for x in reduce_train.columns]
reduce_test.columns = ["".join (c if c.isalnum() else "_" for c in str(x)) for x in reduce_test.columns]
features = [ '2b058fe3', '3393b68b', '363c86c9', '0', '070a5291', '0db6d71d', '12_Monkeys', '1325467d', '2010','37db1c2f', '37ee8496', '392e14df', '3ee399c3', '4022', '4090', '45d01abe', '47026d5f', '4901243f', '499edb7c', '5290eab1', '5348fd84', '562cec5f', '565a3990', '587b5989', '5a848010', '6bf9e3e1', '7372e1a5', '74e5f8a7', '7da34a02', '84538528', '86c924c4', '87d743c1', '884228c8', '8b757ab8', '8fee50e2', '91561152', '9ee1c98c', 'Bug_Measurer__Activity_', 'Crystal_Caves___Level_1', 'Crystal_Caves___Level_2', 'Crystal_Caves___Level_3', 'Ordering_Spheres', 'Rulers', 'Scrub_A_Dub', 'Tree_Top_City___Level_1', 'Tree_Top_City___Level_2', 'Tree_Top_City___Level_3', 'Welcome_to_Lost_Lagoon_', 'a2df0760', 'a5be6304', 'a8876db3', 'a8efe47b', 'acc_Bird_Measurer__Assessment_', 'acc_Cart_Balancer__Assessment_', 'acc_Cauldron_Filler__Assessment_', 'acc_Chest_Sorter__Assessment_', 'acc_Mushroom_Sorter__Assessment_', 'accumulated_accuracy', 'accumulated_accuracy_group', 'accumulated_uncorrect_attempts', 'acf5c23f', 'ad2fc29c', 'b120f2ac', 'b74258a0', 'c51d8688', 'c58186bf', 'c7f7f0e1', 'd185d3ea', 'e4f1efe6', 'e79f3763', 'f54238ee', 'f6947f54', 'f806dc10', 'incomplete_round_count', 'incomplete_round_mean_All_Star_Sorting', 'incomplete_round_mean_Chow_Time', 'incomplete_round_mean_Leaf_Leader', 'incomplete_round_mean_Pan_Balance', 'round_count_max_Air_Show', 'round_count_max_All_Star_Sorting', 'round_count_max_Dino_Dive', 'round_count_max_Dino_Drink', 'round_count_max_Happy_Camel', 'round_count_max_Pan_Balance', 'round_count_max_Scrub_A_Dub', 'round_event_count_max_Air_Show', 'round_event_count_mean_Air_Show', 'round_event_count_mean_All_Star_Sorting', 'round_event_count_mean_Pan_Balance', 'round_event_count_mean_Scrub_A_Dub', 'round_event_count_min_Dino_Dive', 'round_miss_max_All_Star_Sorting', 'round_miss_max_Crystals_Rule', 'round_miss_mean_All_Star_Sorting', 'round_miss_mean_Bubble_Bath', 'round_miss_mean_Crystals_Rule', 'round_miss_mean_Happy_Camel', 'round_miss_mean_Pan_Balance', 'round_miss_mean_Scrub_A_Dub', 'round_miss_min_Chow_Time', 'round_time_max_All_Star_Sorting', 'round_time_mean_All_Star_Sorting', 'round_time_mean_Pan_Balance', 'round_time_min_All_Star_Sorting', 'round_time_min_Pan_Balance', 'session_event_count_max_Bird_Measurer__Assessment_', 'session_event_count_max_Cauldron_Filler__Assessment_', 'session_event_count_max_Mushroom_Sorter__Assessment_', 'session_event_count_mean_Bird_Measurer__Assessment_', 'session_event_count_mean_Bubble_Bath', 'session_event_count_mean_Cauldron_Filler__Assessment_', 'session_event_count_mean_Happy_Camel', 'session_event_count_mean_Mushroom_Sorter__Assessment_', 'session_event_count_min_Cart_Balancer__Assessment_', 'session_event_count_min_Scrub_A_Dub', 'session_time_max_Cauldron_Filler__Assessment_', 'session_time_max_Chest_Sorter__Assessment_', 'session_time_max_Dino_Drink', 'session_time_mean_Bubble_Bath', 'session_time_mean_Scrub_A_Dub', 'session_time_min_Scrub_A_Dub', 'session_title']
# print(len(features))
cols_to_drop = ['session_id', 'installation_id','accuracy_group',
                'installation_session_count',
                'installation_duration_mean',
                'installation_title_nunique',
                'installation_event_code_count_mean',
                "4070",

               ]
features = [f for f in features if f not in cols_to_drop]

In [None]:
class Base_Model(object):
    
    def __init__(self, train_df, test_df, features, categoricals=[], n_splits=5, verbose=True):
        self.train_df = train_df.sample(frac=1, random_state=28).reset_index(drop=True)
        self.test_df = test_df
        self.features = features
        self.n_splits = n_splits
        self.categoricals = categoricals
        self.target = 'accuracy'
        self.verbose = verbose
        self.params = self.get_params()
        self.feat_imp_list = np.zeros(len(features))
        self.y_pred, self.score, self.model = self.fit()
        
    def convert_x(self, x):
        return x
        
    def fit(self):
        oof_pred = np.zeros((len(reduce_train), ))
        y_pred = np.zeros((len(reduce_test), ))
        val_idx_list = []
        cv = GroupKFold(n_splits=self.n_splits)
        for fold, (train_idx, val_idx) in enumerate(cv.split(self.train_df, None, self.train_df['installation_id'])):
            t, v = self.train_df.iloc[train_idx], self.train_df.iloc[val_idx]
            v = get_installation_sample(v)
            val_new_idx = [x[1] for x in v.index]
            val_idx_list += val_new_idx
            
            x_train, x_val = t[self.features], v[self.features]
            y_train, y_val = t[self.target], v[self.target]
            
            train_set, val_set = self.convert_dataset(x_train, y_train, x_val, y_val)
            model = self.train_model(train_set, val_set)
            conv_x_val = self.convert_x(x_val)
            oof_pred[val_new_idx] = model.predict(conv_x_val).reshape(oof_pred[val_new_idx].shape)
            x_test = self.convert_x(self.test_df[self.features])
            y_pred += model.predict(x_test).reshape(y_pred.shape) / self.n_splits
            print('Partial score of fold {} is: {}'.format(fold, eval_qwk_lgb_regr(v['accuracy_group'], oof_pred[val_new_idx])[1]))
        _, loss_score, _ = eval_qwk_lgb_regr(self.train_df['accuracy_group'].iloc[val_idx_list], oof_pred[val_idx_list])
        if self.verbose:
            print('Our oof cohen kappa score is: ', loss_score)
        return y_pred, loss_score, model
    
class Lgb_Model(Base_Model):
    
    def train_model(self, train_set, val_set):
        verbosity = 100 if self.verbose else 0
        self.model = lgb.train(self.params, train_set, valid_sets=[val_set], verbose_eval=verbosity)
        self.feat_imp_list += self.model.feature_importance() / self.n_splits
        return self.model
        
    def convert_dataset(self, x_train, y_train, x_val, y_val):
        train_set = lgb.Dataset(x_train, y_train, categorical_feature=self.categoricals)
        val_set = lgb.Dataset(x_val, y_val, categorical_feature=self.categoricals)
        return train_set, val_set
        
    def get_params(self):
        params = {'n_estimators':10000,
                    'boosting_type': 'gbdt',
                    'objective': 'l2',
                    'metric': 'l2',
                    'subsample': 0.85,
                    'subsample_freq': 1,
                    'learning_rate': 0.008,
                    'feature_fraction': 0.85,
                    'max_depth': 12,
                    'early_stopping_rounds': 200,
                    'seed': 866,
                    'lambda_l1': 1,  
                    'lambda_l2': 1,
                 }
        return params

class Lgb_Model2(Base_Model):
    
    def train_model(self, train_set, val_set):
        verbosity = 100 if self.verbose else 0
        self.model = lgb.train(self.params, train_set, valid_sets=[val_set], verbose_eval=verbosity)
        self.feat_imp_list += self.model.feature_importance() / self.n_splits
        return self.model
        
    def convert_dataset(self, x_train, y_train, x_val, y_val):
        train_set = lgb.Dataset(x_train, y_train, categorical_feature=self.categoricals)
        val_set = lgb.Dataset(x_val, y_val, categorical_feature=self.categoricals)
        return train_set, val_set
        
    def get_params(self):
        params = {'n_estimators':10000,
                    'boosting_type': 'gbdt',
                    'objective': 'regression',
                    'metric': 'rmse',
                    'subsample': 0.85,
                    'subsample_freq': 1,
                    'learning_rate': 0.005,
                    'feature_fraction': 0.8,
                    'max_depth': 10,
                    'early_stopping_rounds': 200,
                    'seed': 886,
                    'lambda_l1': 1,  
                    'lambda_l2': 1,
                 }
        return params


In [None]:
lgb_model = Lgb_Model(reduce_train, reduce_test, features, categoricals=categoricals)
lgb_model2 = Lgb_Model2(reduce_train, reduce_test, features, categoricals=categoricals)

In [None]:
final_pred = lgb_model.y_pred*0.5 + lgb_model2.y_pred*0.5

dist = Counter(reduce_train['accuracy_group'])
for k in dist:
    dist[k] /= len(reduce_train)
reduce_train['accuracy_group'].hist()

acum = 0
bound = {}
for i in range(3):
    acum += dist[i]
    bound[i] = np.percentile(final_pred, acum * 100)
print(bound)

def classify(x):
    if x <= bound[0]:
        return 0
    elif x <= bound[1]:
        return 1
    elif x <= bound[2]:
        return 2
    else:
        return 3
    
final_pred = np.array(list(map(classify, final_pred)))

sample_submission['accuracy_group'] = final_pred.astype(int)
sample_submission.to_csv('submission.csv', index=False)
sample_submission['accuracy_group'].value_counts(normalize=True)
sample_submission