In [0]:
# Change directory to VSCode workspace root so that relative path loads work correctly. Turn this addition off with the DataScience.changeDirOnImportExport setting
import os
try:
	os.chdir(os.path.join(os.getcwd(), '../..'))
	print(os.getcwd())
except:
	pass


In [0]:
from itertools import product
import pandas as pd
import numpy as np
import sys
sys.path.append('python')
from wifipricing.modeling_prepartions import get_lgb_data
from sklearn.externals import joblib


In [0]:
def get_price_grid(price_list, datacap_list):
    """returns dataframe price and datacap list combinations"""
    prod = product(price_list, datacap_list)
    p, d = zip(*prod)  #list of tuples to two tuples
    return pd.DataFrame({'price_usd':p, 'datacap_mb':d})


def label_encode_mapper(df, encoder_dict):
    '''Label transform a training dataframe with a dict of sklearn label encoder'''
    for col, encoder in encoder_dict.items():
        df[col] = encoder.transform(df[col])
    return None


def label_encode_invmapper(df, encoder_dict):
    '''Label inv_transform a training dataframe with a dict of sklearn label encoder'''
    for col, encoder in encoder_dict.items():
        df[col] = encoder.inverse_transform(df[col])
    return None


def newdata_transform(data, col_order, df_transformer, encoder_dict):
    newdata = data.copy()
    df_transformer(newdata, encoder_dict) 

    return newdata[col_order]


def prep_grids(fgrid, pgrid):
    fgrid.drop(columns=pgrid.columns, inplace=True)
    fgrid['dummy'] = 1
    pgrid['dummy'] = 1

    return None


def get_quantile_predictions(fit_dict, X_new):
    pred = {}
    for type, fit_obj in fit_dict.items():
        key = f'y_{type}'
        pred.update({key: fit_obj['model'].predict(X_new)})
    
    return pd.DataFrame(pred)




 Price grid settings

In [0]:
PRICES = list(range(51))
DATACAPS = list(np.arange(0, 160, 10)) 




# Loading model fit objects + grids

In [0]:
alljoblibs = os.listdir('models')
print(alljoblibs)

subset = {'datacap':{}, 'timecap':{},'fulldata':{}}

print('loading files double loop:') 
for sub in subset:
    joblibs = [file for file in alljoblibs if sub in file]

    for quantile in ['upper', 'median', 'lower']:
        path = [f'models/{x}' for x in joblibs if quantile in x][0]
        loaded = joblib.load(path)
        subset[sub].update({quantile: loaded})

        print(f'Loaded {sub}-{quantile} --- RMSE: {loaded["RMSE"]:.4f}')

datacap_fits = subset['datacap']
datacap_fits['median'].keys()



grid_paths = {
    'datacap':'data/summarized_data/df_summarized_datacap_featgrid.feather',
    # 'timecap':'data/summarized_data/df_summarized_timecap_featgrid.feather',
    # 'fulldata':'data/summarized_data/df_summarized_all_featgrid.feather'
}

# for subset, path in grid_paths.items():
#     df = pd.read_feather(path)

featgrid = pd.read_feather(grid_paths['datacap'])
price_grid = get_price_grid(PRICES, DATACAPS)

prep_grids(featgrid, price_grid)


In [0]:
featgrid.head()

In [0]:
price_grid.head()


## Original training data shape reference:
 We need to make sure that our generated data has the same dtype and order

In [0]:
datacap_fits['median']['X_sample'].head()



# Iterate over grids

In [0]:
%timeit
price_grid.head()
pgrid = price_grid.head(5)
model_col_order = datacap_fits['median']['X_sample'].columns

for i, df_row in featgrid.groupby(level=0):
    row_pricegrid = pd.merge(df_row, price_grid, on='dummy').drop(columns=['dummy'])
    X_new = newdata_transform(row_pricegrid, model_col_order, label_encode_mapper, encoder_dict)

    predictions = get_quantile_predictions(datacap_fits, X_new)
    print(f"\nRow {i}:")
    
    out = pd.concat([X_new, predictions], axis=1)
    print(out)

    if i > 5:
        break
