Tune learning rate (-r) and regularization strength (-b).
Do randomized grid search.

In [1]:
import numpy as np
import os
import re
import subprocess

## Results
- 'mid' w/ categorical_features = ['banner_pos', 'platform_id', 'platform_domain', 'platform_category', 'user', 'device_conn_type', 'C14','C17','C20','C21', 'user_count', 'hourly_user_count'].
    + `train_site_mid` 01h.
    (lr, reg) = (0.290019974611113, 2.051767014532633e-05, 0.448956),
 (0.18840479455932513, 9.153173826539158e-06, 0.449061),
 (0.15771034111193996, 7.338765445403715e-06, 0.449214)
    + `train_app_mid` 32m.
    (lr, reg) = (0.5518474237040196, 0.00010336592954287885, 0.340482),
 (1.6906957278948453, 0.00013600060002962432, 0.340784),
 (0.7834994600028368, 3.7867217328877597e-06, 0.341407)
- w/ categorical_features = ['banner_pos', 'platform_id', 'platform_domain', 'platform_category', 'user', 'device_conn_type', 'C14','C17','C20','C21', 'user_count', 'hourly_user_count'].
    + `train_site_small` 13m50s.
    (lr, reg, score): (0.1157877514591988, 0.00010297066722417676, 0.449593),
 (0.17276795944769952, 0.00026624089656912645, 0.449598),
 (0.06292291117472452, 1.1952337794472005e-05, 0.449744)
    + `train_app_small` 07m.
    (0.3281065401364318, 7.611745290054109e-05, 0.343128),
 (0.5070861837562135, 5.456547188985162e-05, 0.343247),
 (0.404193004199436, 0.0007289347911615925, 0.34375)
- w/ device_* cols
    + `train_site_small`: 26m.
        (lr, reg, score): (0.27036067125394514, 0.0002883992671365484, 0.446741),
 (0.13684364782198308, 7.794451026230775e-06, 0.446769),
 (0.0706468934861553, 8.967904921004762e-06, 0.446839)
    + `train_app_small`: 14m.
        (lr, reg, score): (0.164879982368552, 5.998052351848339e-05, 0.340261),
 (0.11218764413671876, 2.5561112811476586e-05, 0.340286),
 (0.40504999930569235, 0.00012167825036020641, 0.34044)

- w/o device_* cols
     + `train_site_small`: learning_rate in (1e-3, 1e1), reg_param in (1e-2, 1e1).
    Try (lr, reg, score) = (1.5055668655636434, 0.14551999446480063), (0.10494282632180114, 1.299328348582395e-06, 0.447807)
     + `train_app_small`: (lr, reg, score) = (0.00166189386987065, 0.48305136367563084), (0.3836766261280591, 9.207841986673778e-05, 0.341098)

In [2]:
def parse_best_loss(run_result):
    lines = run_result.stdout.decode().split('\n')
    best_loss_line = [l for l in lines if 'best loss' in l]
    if best_loss_line:
        best_loss_line = best_loss_line[0]
        best_loss = re.search(r'best loss: (\d+.\d+)', best_loss_line).group(1)
        return float(best_loss)
    else:
        # No line matches 'best loss' if early stopping wasn't necessary.
        # Given the fact that FFM is very likely to overfit, this result
        # is most likely not of our interest.
        return np.nan

def eval_param(train_set, validation_set, learning_rate=0.2, reg_param=1e-5):
    xlearn_train = '~/code/xlearn/build/xlearn_train'
    command = [xlearn_train, train_set,
          '-s 2',
         '-v', validation_set,
          '-r', str(learning_rate),
               '-b', str(reg_param)]
    command = ' '.join(command)
    result = subprocess.run(command, stdout=subprocess.PIPE, shell=True)
    return parse_best_loss(result)

def log_uniform(low=0, high=1, size=None):
    return np.power(10, np.random.uniform(low, high, size))

def eval_param_ls(train_set, validation_set, learning_rates, reg_params):
    scores = [eval_param(train_set, validation_set, lr, reg)
              for lr, reg in zip(learning_rates, reg_params)]
    
    minidx = np.nanargmin(np.array(scores))
    min_score = scores[minidx]
    best_lr = learning_rates[minidx]
    best_reg = reg_params[minidx]
    return min_score, best_lr, best_reg, scores

In [3]:
def best_scores(scores, k):
    top_k = sorted([s for s in scores if s is not np.nan])[:k]
    params = []
    for s in top_k:
        best_idx = scores.index(s)
        params.append((learning_rates[best_idx], reg_params[best_idx], s))
    return params

In [10]:
size=100
learning_rates = log_uniform(-2,1,size) # default = 0.2
reg_params = log_uniform(-6,3,size) # default = 1e-5

In [11]:
data_type='app'
train_size='mid'
project_path = '~/code/avazu-ctr/'
xlearn_train = '~/code/xlearn/build/xlearn_train'
train_set = os.path.join(project_path, f'ffm-data/train_{data_type}_{train_size}.ffm')
validation_set = os.path.join(project_path, f'ffm-data/validate_{data_type}_{train_size}.ffm')

In [12]:
%%time
best_score, best_lr, best_reg, scores = eval_param_ls(train_set, validation_set, learning_rates, reg_params)

CPU times: user 102 ms, sys: 288 ms, total: 391 ms
Wall time: 32min 35s


In [13]:
best_scores(scores, 20)

[(0.5518474237040196, 0.00010336592954287885, 0.340482),
 (1.6906957278948453, 0.00013600060002962432, 0.340784),
 (0.7834994600028368, 3.7867217328877597e-06, 0.341407),
 (0.4379858304490142, 7.480261940395756e-06, 0.341516),
 (0.20168796462154398, 0.0005943687526229239, 0.341568),
 (0.7838804139595561, 4.298613823045098e-05, 0.34162),
 (0.22996115904345563, 5.537495160394821e-05, 0.341785),
 (2.523318768393591, 0.00038462618212701426, 0.342033),
 (0.18764578566646006, 3.069478419855302e-05, 0.342046),
 (2.0853642413843936, 6.321479670549764e-06, 0.342304),
 (0.07796877731223423, 2.4024102937187476e-05, 0.342515),
 (0.20722886918181235, 2.0667795302402616e-05, 0.342626),
 (2.702891659818984, 0.0003031424921599347, 0.342988),
 (0.2704468128212507, 1.8059498387005228e-06, 0.343027),
 (0.11169324860716098, 1.1084375241968226e-05, 0.34304),
 (0.212276061307909, 2.062885265580685e-06, 0.34314),
 (0.11489686766444268, 3.460243711547926e-06, 0.343308),
 (0.5487963858875685, 0.002425562807129

In [8]:
raise

RuntimeError: No active exception to reraise

In [None]:
# find a score for a parameter pair that didn't overfit.
best_score = min([s for s in scores if s > 0.30])
best_idx = scores.index(best_score)
learning_rates[best_idx], reg_params[best_idx], best_score

In [None]:
max_score, best_lr, best_reg

In [None]:
import seaborn as sns
sns.set()
import matplotlib.pyplot as plt

In [None]:
f, ax = plt.subplots(figsize=(7, 7))
ax.set(xscale="log", yscale="log", xlabel='lr', ylabel='reg')
points = [0.5 if s is np.nan or s > 1 else 1/s for s in scores]
sns.scatterplot(learning_rates, reg_params, s=[50*p for p in points])

In [None]:
f, ax = plt.subplots(figsize=(7, 7))
ax.set(yscale="log")
points = [1 if s is np.nan or s > 1 else 1/s for s in scores]
sns.scatterplot(range(size), learning_rates, s = [30*p for p in points])