In [None]:
import os, sys
from pathlib import Path
import pandas as pd
import numpy as np


if os.path.exists(os.path.abspath('../src')):
    sys.path.append(os.path.abspath('../src'))

from feat_build import main
from model_build import train

In [None]:
inv_dir = Path(os.getcwd())
proj_dir = inv_dir.parent

sample_guids_parquet = 'sample_guid_10000_china_us.parquet'
directories = ["frgnd_backgrnd_apps_v4_hist", "web_cat_usage_v2","power_acdc_usage_v4_hist","os_c_state", "hw_pack_run_avg_pwr"]

if 'feat.parquet' not in os.listdir(inv_dir / 'out'):
    main.generate_features(sample_guids_parquet, inv_dir, directories)

In [None]:
# Generates Synthetic Data
main.generate_synthetic_data(proj_dir / 'dummy_data')

In [None]:
syn_feat = pd.read_parquet(proj_dir / "dummy_data" / "synthetic_data.parquet")

In [None]:
syn_feat.shape

In [None]:
feat = pd.read_parquet(os.path.join('out', 'feat.parquet'))

In [None]:
feat.shape

In [None]:
results = pd.DataFrame(columns=["coord_desc", "fw_lap", "fw_exp"])
for l in [0.25, 0.5, .9, 1, 5, 10, 25, 100]:
    print("parameter: ", l)
    test_mse1, feat_dict, r2 = train.train(feat, "lasso", tol=1e-4, l=1/l) 
    test_mse2, feat_dict, r2 = train.train(feat, "fw-lasso-lap", tol=1e-4, l=l, max_iter=2500)
    test_mse3, feat_dict, r2 = train.train(feat, "fw-lasso-exp", tol=1e-4, l=l, max_iter=2500)
    results.loc[l] = [test_mse1, test_mse2, test_mse3]

In [None]:
results

In [None]:
results.plot(kind='line');

In [None]:
test_mse, feat_dict, r2 = train.train(feat, "fw-lasso", tol=1e-4, l=10)

In [None]:
test_mse, feat_dict, r2 = train.train(feat, "fw-lasso", tol=1e-4, l=10, epsilon=10, max_iter=5000)

In [None]:
epss = [0.25, 0.5, 1, 5, 10, 100, 10_000]
epsresults = pd.DataFrame(columns=["fw_lap", "fw_exp"])
for eps in epss:
    print("parameter: ", eps)
    test_mse1, feat_dict, r2 = train.train(syn_feat, "fw-lasso-lap", tol=1e-4, l=10, epsilon=eps, max_iter=2500)
    test_mse2, feat_dict, r2 = train.train(syn_feat, "fw-lasso-exp", tol=1e-4, l=10, epsilon=eps, max_iter=2500)
    epsresults.loc[eps] = [test_mse1, test_mse2]

In [None]:
epsresults

In [None]:
epsresults.plot(kind='line', logx=True);