In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [18]:
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
from google.colab import drive
from tqdm import tqdm
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 1. Set Parameters

To run the ensemble training model, first set the correct paramaters in the next cell. To train an ensemble, run all the cells up to and including Section 4. To train the hyper parameters, run all cells, except those within Section 4.

**IMPORTANT**: if missing the files needed to run the program, check the README file on how to access these files 

In [19]:
# Path to transactions_train data
TRANSACTIONS_DATA_PATH = './data/transactions_train.csv'

# Path were training models can be found
PATH = '../predictions/'

# The ENSEMBLE_MODELS constant helps train on specific models easily.
# Assign it the variable from below depending on what models you want to train the ensemble on

# level-1 medium scoring ensemble training -> 0
# level-2 ensemble training -> 1
# Level-2 with Cosine Similarity -> 2
# Level-2 with Ensemble_blending_sub_models -> 3
# level-2 ensemble with (age clustering + cosine similarity level_1 ensemble) + level-0 models -> 4

ENSEMBLE_MODELS = 0

In [20]:
if ENSEMBLE_MODELS == 0:
  # level-1 ensemble
  model_list = ['SVD_ReRanking_0.0225_correct.csv', 'trending_products_weekly_quotient_mixture_0.0226.csv',
              'Exponential_Decay_0.0217_correct.csv', 'time_is_our_best_friend_0.0220_correct.csv',
              'EDA_Clustering_0.0224.csv', 'Rule_Base_Age_0.0227.csv', 'lstm_sequential_fix_0.0223_correct.csv',
              'trending submission 0.0231.csv']

elif ENSEMBLE_MODELS == 1:
  # level-2 models  
  model_list = ['SVD_ReRanking_0.0225_correct.csv', 'trending_products_weekly_quotient_mixture_0.0226.csv',
              'time_is_our_best_friend_0.0220_correct.csv',
              'EDA_Clustering_0.0224.csv', 'Rule_Base_Age_0.0227.csv', 'lstm_sequential_fix_0.0223_correct.csv',
              'trending submission 0.0231.csv', 'ensemble_all_3_0.02376.csv'] 

elif ENSEMBLE_MODELS == 2:
  # Level-2 with Cosine Sim
  model_list = ['SVD_ReRanking_0.0225_correct.csv', 'trending_products_weekly_quotient_mixture_0.0226.csv',
                'cosineSimilarity_train.csv',
                'EDA_Clustering_0.0224.csv', 'Rule_Base_Age_0.0227.csv', 'lstm_sequential_fix_0.0223_correct.csv',
                'trending submission 0.0231.csv', 'ensemble_all_3_0.02376.csv']

elif ENSEMBLE_MODELS == 3:
  # Level 2 with Ensemble_blending_sub_models
  model_list = ['hm_solution_00224.csv', 'hm_solution_00231.csv',
                'h-m-trending-products-weekly-add-test.csv',
                'hnm-exponential-decay-with-alternate-items.csv', 'trending_products_weekly_quotient_mixture_0.0226.csv', 'lstm-sequential-modelwith-item-features-tutorial.csv',
                'time_is_our_best_friend_submission_0.0220.csv', 'Rule_Base_Age_submission_0.0227.csv']
elif ENSEMBLE_MODELS == 4:
  # simple models + rest age_cosine_0.02239.csv
  model_list = ['SVD_ReRanking_0.0225_correct.csv', 'trending_products_weekly_quotient_mixture_0.0226.csv',
              'age_cosine_0.02239.csv', 'time_is_our_best_friend_0.0220_correct.csv',
              'EDA_Clustering_0.0224.csv', 'Rule_Base_Age_0.0227.csv', 'lstm_sequential_fix_0.0223_correct.csv',
              'trending submission 0.0231.csv']

In [22]:
# Weights assign as the inverse position in the prediction list
position_weights = [1/(i + 1) for i in range(12)]

Set Training Hyper params here

In [23]:
# Cutoff Date for training data
TRAINING_CUTOFF_DATE = '2020-09-08'

# Cutoff Date for testing data
VALIDATION_CUTOFF_DATE = '2020-09-16'

# Learning rate for ensemble model
LEARNING_RATE = 0.0001
REG_CONST = 0.00007
DELTA_CONST = 0.0004

## 2. Define Functions

### 2.1 General Functions

In [24]:
from re import split
# Reads model from submission csv file
def read_model(model_name, path='./'):
  return pd.read_csv(path + model_name).sort_values('customer_id').reset_index(drop=True)

# Creates a df with all the predictions from the given models list
# Necessary conditions:
  # model_list should not be empty
def read_models(model_list, path='./'):
  predictions_df = read_model(model_list[0], path)
  predictions_df.columns = ['customer_id', 'prediction_0']
  predictions_df['prediction_0'] = predictions_df['prediction_0'].apply(lambda x: x.split())
  for i in range(1, len(model_list)):
    col_name = 'prediction_' + str(i)
    curr_prediction_df = read_model(model_list[i], path)
    predictions_df[col_name] = curr_prediction_df['prediction'].apply(lambda x: x.split())
    del curr_prediction_df
  predictions_df = predictions_df.set_index('customer_id')
  return predictions_df


def calculate_prediction_ratings(prediction_row, position_weights, model_weights):
  num_models = prediction_row.shape[0] - 1
  predictions = {}
  # Loop over prediction columns
  for m in range(num_models):
    col_name = 'prediction_' + str(m)
    prediction = prediction_row[col_name][:len(position_weights)]
    # Loop over all the articles in the prediction
    for pos, article_id in enumerate(prediction):
      if article_id in predictions:
        predictions[article_id] += model_weights[m] * position_weights[pos]
      else:
        predictions[article_id] = model_weights[m] * position_weights[pos]
  return predictions
  

# Calculates the final prediction for a Series with prediction columns, by getting the top 12 weighted articles
def get_final_prediction(prediction_row, position_weights, model_weights):
  predictions = calculate_prediction_ratings(prediction_row, position_weights, model_weights)
  # Sort predictions by value and get top 12 items
  final_prediction = list(dict(sorted(predictions.items(), key=lambda item: -item[1])).keys())[:12]

  return final_prediction

# Ensemble all the model predictions into a singular final prediction for each customer
def ensemble_models(predictions_df, position_weights, model_weights):
  predictions_df['prediction'] = predictions_df.apply(get_final_prediction, position_weights=position_weights, model_weights=model_weights, axis=1)
  predictions_df = predictions_df[['customer_id', 'prediction']]
  return predictions_df

### 2.2 Mean Average Precision functions

In [25]:
# Snippet taken from https://www.kaggle.com/code/kaerunantoka/h-m-how-to-calculate-map-12
def apk(actual, predicted, k=12):
    if len(predicted)>k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)

    if not actual:
        return 0.0

    return score / min(len(actual), k)

def mapk(actual, predicted, k=12):
    # return np.mean([apk(a,p,k) for a,p in zip(actual, predicted)])
    return np.mean([apk(a,p,k) for a,p in zip(actual, predicted)])

def calc_map_rank(validation_df, predictions, k=12):
  actual_list = list(validation_df['articles'])
  predicted = list(predictions[validation_df['customer_id']])
  return mapk(actual_list, predicted) * 2

def get_val_items(validation_df):
  val_users = list(validation_df['customer_id'])
  val_items = list(validation_df['articles'])

### 2.3 Training Functions

In [26]:
# Initialize weights using xavier Initialization
def xavier_initialization(num_models):
  weights = np.random.uniform(size=num_models)
  weights = weights * np.sqrt(6 / num_models)
  return weights

# Calculates the sigmoid function
def sigmoid(x):
    sigmoid_res = 1 / (1 + np.exp(-x))
    # to avoid issues with cost and gradient, don't allow sigmoid to be 1
    epsilon = 0.0000001
    sigmoid_res = np.maximum(sigmoid_res, 0 + epsilon)
    sigmoid_res = np.minimum(sigmoid_res, 1 - epsilon)
    return sigmoid_res

# Plot graph
def plot_graph(data, xlabel, ylabel, title):
    plt.plot(data)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    # plt.legend()
    plt.show()

class EnsembleModel():
  def __init__(self, learning_rate, predictions_df, training_df, position_weights, validation_df, 
               num_models, delta=1/130, regularizer=0, model_weights=None, verbose=True):
    if not model_weights:
      model_weights = EnsembleModel.xavier_initialization(num_models)
    
    self.model_weights = model_weights
    self.predictions_df = predictions_df
    self.training_df = training_df
    self.position_weights = position_weights
    self.validation_df = validation_df
    self.learning_rate = learning_rate
    self.delta = delta
    self.regularizer = regularizer
    self.verbose = verbose

  # Initialize weights using xavier Initialization
  def xavier_initialization(num_models):
    weights = np.random.uniform(size=num_models)
    weights = weights * np.sqrt(6 / num_models)
    return weights

  # Train Model
  def train(self, epochs):
    map_ranks = []
    for i in tqdm(range(epochs), disable=not self.verbose):
      map_rank = self.train_epoch()
      print('Epoch ' + str(i) + ' rank: ' + str(map_rank))
      print('Weights: ', self.model_weights)
      map_ranks.append(map_rank)
    
    if self.verbose:
      # plot map ranks
      plot_graph(map_ranks, 'Epoch', 'MAP Score', 'Learning Curve (MAP Score)')
    return self.model_weights, map_ranks
      
      

  # Train Epoch
  def train_epoch(self):
    # randomly sort the training_df
    self.training_df = self.training_df.sample(frac=1)

    # for all rows in training_df
      # readjust weights of all models
    self.training_df.apply(self.model_backprop, axis=1)
    # calculate final predictions
    epoch_pred = self.predictions_df.apply(
        get_final_prediction,
        position_weights=self.position_weights,
        model_weights=self.model_weights,
        axis=1)
    # calculate map rank
    map_rank = calc_map_rank(self.validation_df, epoch_pred)
    return map_rank

  # Fix val used as calculated for with the weights from all the classes
  # Readjust the weights for each model using SGD
  def model_backprop(self, validation_row):
    # select prediction for customer from validation row
    customer_id = validation_row['customer_id']
    actual_articles = validation_row['articles']

    # calculate the final values for each item
    predictions = self.predictions_df.loc[customer_id]
    values = calculate_prediction_ratings(predictions, self.position_weights, self.model_weights)
    # print(values)

    # for all models
    for m in range(len(self.model_weights)):
      weight = self.model_weights[m]
      col_name = 'prediction_' + str(m)
      prediction = predictions[col_name][:len(self.position_weights)]
      # for all the articles in the validation row
      for article_id in actual_articles:
        # set value to 0 if it does not appear in prediction values dict
        if article_id in values:
          val = values[article_id]
        else:
          val = 0

        # if the article appears in the predicted list
        try:
          pred_index = prediction.index(article_id)
          # calculate weight of individual article
          article_weight = self.position_weights[pred_index]
          # # update weight using sigmoid function
          # self.model_weights[m] -= self.learning_rate * ((sigmoid(val) - 1.5) * article_weight + self.regularizer * weight)
        except ValueError:
          # update weight as w = w - learning_rate * delta
          # self.model_weights[m] -= self.learning_rate * (self.delta + self.regularizer * weight)
          article_weight = - self.delta

        # update weight using sigmoid function
        self.model_weights[m] -= self.learning_rate * ((sigmoid(val) - 1.5) * article_weight + self.regularizer * weight)
    return

    

### 2.4 Hyper Param Tuning Functions

In [27]:
# Plot graph
def plot_graph_tune(data1, data2, xlabel, ylabel, title):
    # create an index for each tick position
    xi = list(range(len(data2)))
    plt.plot(xi, data1, label='Test')
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.xticks(xi[::2], data2[::2])
    plt.legend()
    plt.show()

# Tune the learning rate, delta and regularizer params
def tune_hyper_params():
  pass

def tune_learning_rate(predictions_df, training_df, validation_df, position_weights, num_models, delta=1/130,
                       regularizer=0, initial_learning_rate=0.0000001, epochs=30, iterations=20, model_weights=None, verbose=False):
  learning_rate = initial_learning_rate
  learning_rate_final_scores = []
  learning_rate_max_scores = []
  learning_rates = []

  for i in tqdm(range(iterations)):
    # build and train ensemble
    ensemble_model = EnsembleModel(learning_rate, predictions_df, training_df, position_weights, validation_df,
                                  num_models, delta, regularizer, model_weights, verbose)
    final_weights, map_ranks = ensemble_model.train(epochs)
    max_rank = max(map_ranks)
    # Add scores to plot later
    learning_rate_final_scores.append(map_ranks[-1])
    learning_rate_max_scores.append(max_rank)
    learning_rates.append(learning_rate)
    print('Learning Rate: ' + str(learning_rate) + ' => ' + str(max_rank))

    learning_rate *= 3

  plot_graph_tune(learning_rate_final_scores, learning_rates, 'Learning Rates', 'MAP Score', 'Learning Rates Tuning')
  plot_graph_tune(learning_rate_max_scores, learning_rates, 'Learning Rates', 'Max MAP Score', 'Learning Rates Max Tuning')
  return learning_rate_final_scores, learning_rate_max_scores, learning_rates

def tune_regularizer(predictions_df, training_df, validation_df, position_weights, num_models, delta=1/130, learning_rate=0.0001,
                       initial_regularizer=0.00000001, epochs=30, iterations=20, model_weights=None, verbose=False):
  regularizer = initial_regularizer
  regularizer_final_scores = []
  regularizer_max_scores = []
  regularizers = []

  for i in tqdm(range(iterations)):
    # build and train ensemble
    ensemble_model = EnsembleModel(learning_rate, predictions_df, training_df, position_weights, validation_df,
                                  num_models, delta, regularizer, model_weights, verbose)
    final_weights, map_ranks = ensemble_model.train(epochs)
    max_rank = max(map_ranks)
    # Add scores to plot later
    regularizer_final_scores.append(map_ranks[-1])
    regularizer_max_scores.append(max_rank)
    regularizers.append(regularizer)
    print('Regularizer: ' + str(regularizer) + ' => ' + str(max_rank))

    regularizer *= 3

  plot_graph_tune(regularizer_final_scores, regularizers, 'Regularizer', 'MAP Score', 'Regularization Constant Tuning')
  plot_graph_tune(regularizer_max_scores, regularizers, 'Regularizer', 'Max MAP Score', 'Regularization Constant Max Tuning')
  return regularizer_final_scores, regularizer_max_scores, regularizers

def tune_delta(predictions_df, training_df, validation_df, position_weights, num_models, regularizer=0, learning_rate=0.0001,
                      initial_delta=0.00000001, epochs=30, iterations=20, model_weights=None, verbose=False):
  delta = initial_delta
  delta_final_scores = []
  delta_max_scores = []
  deltas = []

  for i in tqdm(range(iterations)):
    # build and train ensemble
    ensemble_model = EnsembleModel(learning_rate, predictions_df, training_df, position_weights, validation_df,
                                  num_models, delta, regularizer, model_weights, verbose)
    final_weights, map_ranks = ensemble_model.train(epochs)
    max_rank = max(map_ranks)
    # Add scores to plot later
    delta_final_scores.append(map_ranks[-1])
    delta_max_scores.append(max_rank)
    deltas.append(delta)
    print('Delta: ' + str(delta) + ' => ' + str(max_rank))

    delta *= 3

  plot_graph_tune(delta_final_scores, deltas, 'Delta', 'MAP Score', 'Delta Constant Tuning')
  plot_graph_tune(delta_max_scores, deltas, 'Delta', 'Max MAP Score', 'Delta Constant Max Tuning')
  return delta_final_scores, delta_max_scores, deltas

## 3. Load Data

### 3.1 Load training and validation data

In [28]:
# Load all the transactions data
training_df = pd.read_csv(TRANSACTIONS_DATA_PATH, dtype={'article_id': str})

In [29]:
# Select validation data as purchases made after the cutofff validation date
validation_df = training_df[training_df.t_dat > VALIDATION_CUTOFF_DATE]
validation_df.sort_values(['t_dat']).head()

Unnamed: 0,t_dat,customer_id,article_id,price,sales_channel_id
31575041,2020-09-17,00077dbd5c4a4991e092e63893ccf29294a9d5c46e8501...,903762001,0.06778,2
31602183,2020-09-17,ab136197d5c4a45122a1ab03e56ac2dcf873c7f03997f2...,870989001,0.101678,1
31602184,2020-09-17,ab136197d5c4a45122a1ab03e56ac2dcf873c7f03997f2...,399061008,0.059305,1
31602185,2020-09-17,ab136197d5c4a45122a1ab03e56ac2dcf873c7f03997f2...,882899003,0.06778,1
31602186,2020-09-17,ab136197d5c4a45122a1ab03e56ac2dcf873c7f03997f2...,898107003,0.025407,1


In [30]:
validation_df = validation_df.groupby('customer_id')['article_id'].apply(list)
validation_df = pd.DataFrame(validation_df).reset_index()
validation_df.columns = ['customer_id', 'articles']
validation_df.head()

Unnamed: 0,customer_id,articles
0,00039306476aaf41a07fed942884f16b30abfa83a2a8be...,[0624486001]
1,0003e867a930d0d6842f923d6ba7c9b77aba33fe2a0fbf...,[0827487003]
2,000493dd9fc463df1acc2081450c9e75ef8e87d5dd17ed...,"[0757926001, 0788575004, 0640021019]"
3,000525e3fe01600d717da8423643a8303390a055c578ed...,[0874110016]
4,00077dbd5c4a4991e092e63893ccf29294a9d5c46e8501...,"[0903762001, 0879189005, 0158340001, 086796600..."


In [31]:
# Select training data as purchases made after the cutofff training date and before the validation set
training_df = training_df[(training_df.t_dat > TRAINING_CUTOFF_DATE) & (training_df.t_dat <= VALIDATION_CUTOFF_DATE)]
training_df.sort_values(['t_dat']).head()

Unnamed: 0,t_dat,customer_id,article_id,price,sales_channel_id
31292772,2020-09-09,000493dd9fc463df1acc2081450c9e75ef8e87d5dd17ed...,399136061,0.08339,2
31320518,2020-09-09,a9177440b3d4ec2a7d76b602205c657063cc0ed6144f5a...,797988002,0.024475,2
31320519,2020-09-09,a928d55708c1d48e8736bbd79d7f904d90991576ca91ef...,733386002,0.06778,1
31320520,2020-09-09,a928d55708c1d48e8736bbd79d7f904d90991576ca91ef...,686564013,0.033881,1
31320521,2020-09-09,a928d55708c1d48e8736bbd79d7f904d90991576ca91ef...,564358060,0.033881,1


In [32]:
training_df = training_df.groupby('customer_id')['article_id'].apply(list)
training_df = pd.DataFrame(training_df).reset_index()
training_df.columns = ['customer_id', 'articles']
training_df.head()

Unnamed: 0,customer_id,articles
0,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,[0794321007]
1,0000757967448a6cb83efb3ea7a3fb9d418ac7adf2379d...,"[0719530003, 0448509014]"
2,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,[0734592001]
3,0003e867a930d0d6842f923d6ba7c9b77aba33fe2a0fbf...,"[0640021012, 0621381012, 0880017001, 088001700..."
4,00040239317e877c77ac6e79df42eb2633ad38fcac09fc...,"[0875272011, 0875272012, 0875272011, 0875272012]"


### 3.2 Load prediction models

In [33]:
# Build df with all the predicitions from each model
predictions_df = read_models(model_list, PATH)
predictions_df.head()

Unnamed: 0_level_0,prediction_0,prediction_1,prediction_2,prediction_3,prediction_4,prediction_5,prediction_6,prediction_7
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
00000dbacae5abe5e23885899a1fa44253a17956c6d1c3d25f88aa139fdfc657,"[0568601043, 0751471001, 0915526001, 070601600...","[0568601043, 0568601006, 0706016001, 044850901...","[0568601043, 0751471001, 0915526001, 070601600...","[0568601043, 0915526001, 0751471001, 075147104...","[0568601043, 0568601006, 0841260003, 074523200...","[0568601043, 0568601006, 0745232001, 084126000...","[0568601043, 0568601006, 0751471001, 070601600...","[0568601043, 0568601006, 0568601006, 056859700..."
0000423b00ade91418cceaf3b26c6af3dd342b51fd051eec9c12fb36984420fa,"[0751471001, 0915526001, 0706016001, 075147104...","[0826211002, 0739590027, 0811835004, 059958008...","[0751471001, 0915526001, 0706016001, 075147104...","[0915526001, 0751471001, 0751471043, 070601600...","[0826211002, 0811835004, 0739590027, 059958008...","[0826211002, 0811835004, 0599580083, 073959002...","[0826211002, 0751471001, 0811835004, 073959002...","[0826211002, 0811835004, 0599580083, 073959002..."
000058a12d5b43e67d225668fa1f8d618c13dc232df0cad8ffe7ad4a1091e318,"[0751471001, 0915526001, 0706016001, 075147104...","[0852643003, 0852643001, 0858883002, 070601600...","[0751471001, 0915526001, 0706016001, 075147104...","[0915526001, 0751471001, 0751471043, 070601600...","[0852643003, 0858883002, 0351484002, 072352900...","[0852643003, 0858883002, 0852643001, 035148400...","[0852643003, 0751471001, 0852643001, 085888300...","[0852643003, 0852643001, 0858883002, 075042401..."
00005ca1c9ed5f5146b52ac8639a40ca9d57aeff4d1bd2c5feb1ca5dff07c43e,"[0751471001, 0915526001, 0706016001, 075147104...","[0706016001, 0448509014, 0751471001, 057954100...","[0751471001, 0915526001, 0706016001, 075147104...","[0915526001, 0751471001, 0751471043, 070601600...","[0579541001, 0751471001, 0678942001, 078334600...","[0751471001, 0579541001, 0568601006, 067894200...","[0706016001, 0751471001, 0448509014, 057954100...","[0706016001, 0448509014, 0751471001, 057954100..."
00006413d8573cd20ed7128e53b7b13819fe5cfc2d801fe7fc0f26dd8d65a85a,"[0896152002, 0730683050, 0791587015, 092753000...","[0896152002, 0730683050, 0791587015, 092753000...","[0896152002, 0730683050, 0927530004, 079158701...","[0915526001, 0751471001, 0751471043, 070601600...","[0730683050, 0818320001, 0791587015, 089615200...","[0730683050, 0896152002, 0791587015, 092753000...","[0896152002, 0730683050, 0791587015, 092753000...","[0896152002, 0730683050, 0791587015, 092753000..."


## 4. Train Ensemble Model

In [None]:
# Initialize weights
ensemble_model = EnsembleModel(LEARNING_RATE, predictions_df, training_df, position_weights, validation_df, len(model_list), regularizer=REG_CONST, delta=DELTA_CONST)
model_weights, map_ranks = ensemble_model.train(15)

In [None]:
print(model_weights)

## 5. Save Weight and Rank

In [None]:
# Write submission to CSV file
def write_submission(submission_df, fname, path='./'):
  submission_df.to_csv(path + fname, index=False)

# Append ensemble model info to rank file
def save_weight_rank(model_list, weights, rank, fname='ensemble_ranks.csv', path='./'):
  path = path + fname
  rank_df = pd.read_csv(path)
  row = {
    'models': ' '.join(model_list),
    'weights': ' '.join([str(x) for x in model_weights]),
    'rank': rank
  }
  rank_df = rank_df.append(row, ignore_index=True)
  # rank_df=pd.DataFrame(row, index=[1])
  rank_df.to_csv(path, index=False)
  return rank_df

In [None]:
# Record weights and rank into file
save_weight_rank(model_list, model_weights, map_ranks[-1], path=PATH)

## 6. Tune Hyperparameters

### 6.1 Tune Learning Rate

In [None]:
learning_rate_res = tune_learning_rate(predictions_df, training_df, validation_df, position_weights,
                                       len(model_list), delta=1/130, regularizer=0, initial_learning_rate=0.0081,
                                       epochs=10, iterations=5, model_weights=None, verbose=False)

In [None]:
plot_graph_tune(learning_rate_res[0], learning_rate_res[2], 'Learning Rates', 'MAP Score', 'Learning Rates Tuning')
plot_graph_tune(learning_rate_res[1], learning_rate_res[2], 'Learning Rates', 'Max MAP Score', 'Learning Rates Max Tuning')

In [None]:
learning_rate_res[0]

In [None]:
learning_rate_res[1]

In [None]:
learning_rate_res[2]

### 6.2 Tune Regularization Constant

In [None]:
regularizer_res = tune_regularizer(predictions_df, training_df, validation_df, position_weights, len(model_list),
                                   delta=1/130, learning_rate=0.0007, initial_regularizer=0.00001, epochs=15,
                                   iterations=10, model_weights=None, verbose=False)

### 6.3 Tune Delta Constant

In [None]:
delta_res = tune_delta(predictions_df, training_df, validation_df, position_weights, len(model_list),
                       regularizer=0.00007, learning_rate=0.0007, initial_delta=8.51e-07, epochs=15,
                       iterations=4, model_weights=None, verbose=False)

In [None]:
delta_res[0]

In [None]:
delta_res[1]

In [None]:
delta_res[2]