In [14]:
%matplotlib inline
from matplotlib import pyplot as plt
from collections import defaultdict
import json
import numpy as np
import pandas as pd
import re
import seaborn as sb
sb.set()

In [15]:
def is_experiment_start(line):
    return line.startswith('evaluating for')

def skip_n_experiments(input_file, experiment_num):
    current_experiment = 0
    while current_experiment < experiment_num:
        line = input_file.readline()
        if is_experiment_start(line):
            current_experiment += 1
            
def get_metrics(line):
    regexp = re.compile(r'[a-zA-Z0-9_]+\: [0-9\.]+')
    result = {}
    for metric_str in regexp.findall(line):
        metric, value = metric_str.split(': ')
        result[metric] = float(value)
    return result
    
            
def parse_experiment(experiment_log):
    current_recommender = None
    result = []
    cnt =0
    metrics = []
    experiment_finished = True
    for line in experiment_log:
            if line.startswith('evaluating '):
                current_recommender = line.split(' ')[1]
                metrics = []
                experiment_finished = False
            if 'val_ndcg_at_40' in line:
                    metrics.append(get_metrics(line))
            try:
                experiment_results = json.loads(line)
                experiment_results['model_name'] =  current_recommender
                experiment_results['metrics_history'] = metrics
                result.append(experiment_results)
                experiment_finished = True
            except:
                pass
    if not experiment_finished:
        experiment_results = {}
        experiment_results['model_name'] =  current_recommender
        experiment_results['metrics_history'] = metrics
        result.append(experiment_results)
    return result

def get_data_from_logs(logfile, experiment_num):
    current_experiment = 0
    with open(logfile) as input_file:
        skip_n_experiments(input_file, experiment_num)
        experiment_log = []
        for line in input_file:
            if is_experiment_start(line):
                break
            else:
                experiment_log.append(line.strip())
        return parse_experiment(experiment_log)

In [16]:
def moving_average(x, w):
    return np.convolve(x, np.ones(w), 'valid') / w

In [17]:
experiment_logs = './results/booking_config_ltr_2021_02_01T13_49_15/stdout'
data = get_data_from_logs(experiment_logs, 0)

In [18]:
df = pd.DataFrame(data).set_index('model_name').sort_values('SPS@4')
df.sort_values('ndcg@4')

df['objective'] = [x.split('-')[-1] for x in df.index]
df['booster'] = [x.split('-')[-2] for x in df.index]


In [19]:
good_df = df[df.booster != 'rf']

In [20]:
good_df.groupby('objective').mean().sort_values('ndcg@40')

Unnamed: 0_level_0,precision@4,SPS@4,ndcg@4,ndcg@40,model_build_time,model_inference_time
objective,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mape,0.07979,0.31916,0.235208,0.368449,414.962271,46.714759
rank_xendcg,0.109867,0.43947,0.335215,0.391287,177.777356,44.113322
regression_l1,0.091108,0.364432,0.295119,0.407463,328.861916,48.308441
Neural,0.114995,0.45998,0.348868,0.423816,4689.558168,149.008629
quantile,0.119216,0.476863,0.366006,0.445621,367.461559,47.948216
poisson,0.117809,0.471236,0.360153,0.44656,415.057848,49.387735
fair,0.121436,0.485743,0.379631,0.456816,249.578643,47.422878
huber,0.119529,0.478114,0.378881,0.459465,288.788421,47.901412
regression,0.124031,0.496123,0.386923,0.46507,276.585576,48.263079
lambdarank,0.122718,0.49087,0.382891,0.465405,436.775603,50.535018


In [21]:
df.groupby('booster').mean().sort_values('ndcg@40')

Unnamed: 0_level_0,precision@4,SPS@4,ndcg@4,ndcg@40,model_build_time,model_inference_time
booster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
APREC,0.114995,0.45998,0.348868,0.423816,4689.558168,149.008629
rf,0.112925,0.451701,0.343712,0.43148,174.82992,47.215277
dart,0.113082,0.452326,0.351424,0.434347,517.330149,50.706113
gbdt,0.112806,0.451226,0.350411,0.440754,173.028886,45.910992


In [22]:
good_df.sort_values('ndcg@40')

Unnamed: 0_level_0,precision@4,SPS@4,ndcg@4,ndcg@40,model_build_time,model_inference_time,model_metadata,metrics_history,objective,booster
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Lightgbm-dart-rank_xendcg,0.103114,0.412456,0.312533,0.341543,199.243688,44.128229,{},[],rank_xendcg,dart
Lightgbm-gbdt-mape,0.07979,0.31916,0.235216,0.368349,178.065018,44.276613,{},[],mape,gbdt
Lightgbm-dart-mape,0.07979,0.31916,0.235199,0.36855,651.859524,49.152905,{},[],mape,dart
Lightgbm-gbdt-regression_l1,0.085605,0.342421,0.271751,0.396109,158.577517,45.518018,{},[],regression_l1,gbdt
Lightgbm-dart-regression_l1,0.096611,0.386443,0.318486,0.418816,499.146315,51.098865,{},[],regression_l1,dart
APREC-Neural,0.114995,0.45998,0.348868,0.423816,4689.558168,149.008629,{},"[{'loss': 0.9074, 'ndcg_at_40': 0.0247, 'val_l...",Neural,APREC
Lightgbm-gbdt-poisson,0.114182,0.456728,0.347499,0.43631,163.943451,44.389229,{},[],poisson,gbdt
Lightgbm-gbdt-rank_xendcg,0.116621,0.466483,0.357898,0.441032,156.311024,44.098415,{},[],rank_xendcg,gbdt
Lightgbm-gbdt-quantile,0.118684,0.474737,0.362834,0.443134,160.153684,45.547329,{},[],quantile,gbdt
Lightgbm-dart-quantile,0.119747,0.478989,0.369177,0.448107,574.769434,50.349103,{},[],quantile,dart


In [23]:
def get_metric(metric_history, metric_name):
    result = []
    for item in metric_history:
        try:
            result.append(item[metric_name])
        except:
            print(item)
    return result

plt.figure(figsize=(16, 9))
for recommender in data:
    if len(recommender['metrics_history']) > 0:
        history = recommender['metrics_history']
        ndcg = get_metric(history, 'ndcg_at_40')
        val_ndcg = moving_average(get_metric(history, 'val_ndcg_at_40'), 1)
        
        success = get_metric(history, 'Success_at_4')
        val_success = moving_average(get_metric(history, 'val_Success_at_4'), 5)
        
        #plt.plot(val_success, label=recommender['model_name'] + "val_sps")
        #plt.plot(success, label=recommender['model_name'] + "sps")
        
        plt.plot(val_ndcg, label=recommender['model_name'] + "val_ndcg")
        plt.plot(ndcg, label=recommender['model_name'] + "ndcg")
        #plt.plot(val_success, label=recommender['model_name'] + "_val_sps")
plt.legend()


{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{

ValueError: v cannot be empty

<Figure size 1152x648 with 0 Axes>

In [None]:
np.argmax(val_ndcg)

In [None]:
plt.scatter(df['SPS@4'], df['ndcg@40'])
