# Load models and predict

In [7]:
from lilac.jobs.job_base import BasicSeedJob

In [8]:

params={
    'train_path': 'data/train_v5.csv',
    'test_path': 'data/test_v5.csv',
    'features_dir': 'data/features/v5_sub',
    'target_col': 'price_log',
    'base_col': 'area_log',
    'depth': 8,
    'min_child_samples': 200,
    'folds_gen_str': 'group',
    'group_key_col': 'ordered',
    'evaluator_str': 'mae',
    'unused_cols': ['id', 'ordered', 'built_year', 'area', 'nearest_min', 'age'],
    'target_enc_cols': ['nearest_sta', 'district'],
    'log_target_on_target_enc': False,
    'features_settings': [
        {
            'model_str': 'group',
            'params': {
                'group_keys': ['city', 'district', 'nearest_sta'],
                'agg_func_list': ['mean'],
                'input_cols': ['built_year_seireki','area','nearest_min','age','area_log','nearest_min_log','age_log']
                }
        },
        {
            'model_str': 'cat_lda',
            'params': {'main_col': 'city', 'sub_col': 'layout', 'num_topics': 5}
        },
        {
            'model_str': 'diff_group',
            'params': {'group_key': 'district',
            'input_cols': ['built_year_seireki', 'area_log', 'nearest_min_log']}
        }
    ],
    "pred_only": True
}

In [9]:
data_path="data/models"
experiment_flag = "submit"
job_names=["lgbm_diff_mae_42","lgbm_diff_mae_43","lgbm_mae_42","lgbm_mae_43"]
model_dirs = [f"{data_path}/{experiment_flag}/{job_name}" for job_name in job_names]
seeds=[42,43,42,43]
models = ["lgbm_diff_mae","lgbm_diff_mae","lgbm_mae","lgbm_mae"]

preds=[]
for model_dir, seed, model_str in zip(model_dirs,seeds,models):
    params["model_dir"]=model_dir
    params["seed"]=seed
    params["model_str"]= model_str
    output=BasicSeedJob(**params).run()
    pred=output["pred"]
    preds.append(pred)

Extracting required params in FeatureGeneratorsFactory.
Extracting required params in FeatureGeneratorsFactory.
Extracting required params in FeatureGeneratorsFactory.
Extracting required params in FoldsGeneratorFactory.
Extracting required params in TrainerFactory.
Extracting required params in EvaluatorFactory.
[1/3] GroupFeatures
Loading _GroupFeatures/430f5abfeca359abe0b7edd5a9ffa278 (train)...
Loading _GroupFeatures/430f5abfeca359abe0b7edd5a9ffa278 (test)...
Loading _GroupFeatures/d81ac306b4b857898d07605673306bd8 (train)...
Loading _GroupFeatures/d81ac306b4b857898d07605673306bd8 (test)...
Loading _GroupFeatures/451639cd3fe572fa2ec47f99d5ee5e03 (train)...
Loading _GroupFeatures/451639cd3fe572fa2ec47f99d5ee5e03 (test)...
Loading _GroupFeatures/486a343bdae1287fefe39d7a562144ec (train)...
Loading _GroupFeatures/486a343bdae1287fefe39d7a562144ec (test)...
Loading _GroupFeatures/c6a68002c5d26ce7edf81fb999e58546 (train)...
Loading _GroupFeatures/c6a68002c5d26ce7edf81fb999e58546 (test)...


In [10]:
import numpy as np

pred=np.array(preds).mean(axis=0)

In [11]:
# 入賞した提出ファイルを読み込む
import pandas as pd

subs=pd.read_csv("data/subs/063.csv")

In [13]:
# subsとpredを比較
subs["pred"] = pred

subs[subs["pred"]!=subs["取引価格（総額）_log"]]

Unnamed: 0,ID,取引価格（総額）_log,pred
1,1000119,7.353071,7.353071
3,1000382,6.941047,6.941047
6,1000531,7.639745,7.639745
8,1000537,7.339947,7.339947
11,1000751,7.439750,7.439750
...,...,...,...
20999,47003869,7.138563,7.138563
21000,47003951,7.147557,7.147557
21001,47006086,7.147468,7.147468
21005,47006712,7.514760,7.514760


In [16]:
((subs["取引価格（総額）_log"] - subs["pred"])**2).mean()

6.875072810390005e-31