In [1]:
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import plotly.graph_objs as go
import re



In [2]:
data_dir = '/home/os/Projects/upwork/python-sdk/improveai/experiments/propensity_model/data/'
res_dir = '/home/os/Projects/upwork/python-sdk/improveai/experiments/propensity_model/results/'

In [3]:
def load_run_res_json(dirname):
    
    des_fname = '{}/results_summary.json'.format(dirname)
    
    with open(des_fname, 'r') as crj:
        curr_f_str = crj.read()
        curr_json = json.loads(curr_f_str)
    
    return curr_json

In [4]:
# get list of all results
all_res_dir = os.listdir(res_dir)
# all_res_dir

# assuming processing results of a single run 
# (one model per <case>_<variants count>_<samples count>_<distribution info> combo)
all_results = dict(
    zip(['-'.join(dn.split('-')[1:]) for dn in all_res_dir], 
        [load_run_res_json('{}/{}'.format(res_dir, des_dirname)) for des_dirname in all_res_dir]))



In [5]:
# chkd_prfxs = ['not_weighted', 'weighted', 'appended']
# name pttrn: <case>-<variants count>-<samples count>-<distribution info> appended_vc300_sc4000_weib_rl5_a10

splt_fnames = [sfn.split('-') for sfn in all_results.keys()]

splt_fnames[:10]

dist_varaints_counts = np.unique([int(fn[1].replace('vc', '')) for fn in splt_fnames]).tolist()
dist_samples_counts = np.unique([int(fn[2].replace('sc', '')) for fn in splt_fnames]).tolist()
dist_prop_distributions = np.unique([fn[3] for fn in splt_fnames]).tolist()

cases_map = {
    'not_weighted': '#1',
    'weighted': '#2',
    'appended': '#3'
}

plotted_values = ['model_propensity_sum', 'propensity_smape', 'total_duration_mins']

plotted_values_map = {
    'model_propensity_sum': 'Sum of all variants` propensities (calculated by model)', 
    'propensity_smape': 'Symmetric Mean Absolute Percentage Error [%] of calculated variants` propensities', 
    'total_duration_mins': 'Time in minutes to obtain prop. model results (from data load to eval.)'
}

dist_map = {
    'uni': 'Uniform',
    'norm_m150_sd30': 'Normal with Mean = 150 and SD = 30',
    'weib_rl5_a10': 'Weibull with a = 10'
}

In [6]:
dist_prop_distributions

['norm_m150_sd30', 'uni', 'weib_rl5_a10']

In [7]:


res_df_src = []

for curr_case in cases_map.keys():
    
    for vc in dist_varaints_counts:
        
        for sc in dist_samples_counts:            
            
            for curr_dist in dist_prop_distributions:
                single_row = [curr_case, cases_map[curr_case], vc, sc, curr_dist]
                curr_fn_key = '{}-vc{}-sc{}-{}'.format(curr_case, vc, sc, curr_dist)
                
                curr_res_dict = all_results.get(curr_fn_key, None)
                if not curr_res_dict:
                    continue
                
                for pv in plotted_values:
                    single_row.append(curr_res_dict[pv])
                
                res_df_src.append(single_row)

                
res_df = pd.DataFrame(
    res_df_src, columns = ['case_name', 'case_symbol', 'variants_count', 'samples_count', 'propenisty_dist'] + plotted_values)

In [8]:
res_df

Unnamed: 0,case_name,case_symbol,variants_count,samples_count,propenisty_dist,model_propensity_sum,propensity_smape,total_duration_mins
0,not_weighted,#1,300,100,norm_m150_sd30,1.001097,63.749616,0.02762
1,not_weighted,#1,300,100,uni,1.002376,41.146038,0.024344
2,not_weighted,#1,300,100,weib_rl5_a10,0.999671,91.590978,0.025591
3,not_weighted,#1,300,500,norm_m150_sd30,1.000316,61.446163,0.111
4,not_weighted,#1,300,500,uni,1.000263,16.987143,0.109527
5,not_weighted,#1,300,500,weib_rl5_a10,1.000111,90.041903,0.114834
6,not_weighted,#1,300,1000,norm_m150_sd30,1.000215,60.417502,0.22355
7,not_weighted,#1,300,1000,uni,1.000094,14.247984,0.2355
8,not_weighted,#1,300,1000,weib_rl5_a10,1.000134,90.027416,0.222832
9,not_weighted,#1,300,2000,norm_m150_sd30,1.00005,58.585074,0.505629


In [9]:
sc_cn = 'samples_count'
cn_cn = 'case_name'
pd_cn = 'propenisty_dist'

plts_save_prfx = '/home/os/Projects/upwork/python-sdk/improveai/experiments/propensity_model/plots'

# plotted_values = ['model_propensity_sum', 'propensity_smape', 'total_duration_mins']

for pv in plotted_values:

    for prop_dist in dist_prop_distributions:
        
        if pv == 'model_propensity_sum':
            y_ax_limits = [0, 5]
        elif pv == 'propensity_smape':
            y_ax_limits = [0, 100]
        elif pv == 'total_duration_mins':
            y_ax_limits = [0, 3]
        else:
            y_ax_limits = None
        
        pd_prop_sum_fig = go.Figure(layout_yaxis_range=y_ax_limits)

        for curr_case in cases_map.keys():
            
            pd_res_df = \
                res_df[(res_df[pd_cn] == prop_dist) & (res_df[cn_cn] == curr_case)]\
                .reset_index(drop=True)

            pd_prop_sum_fig.add_trace(
                go.Scatter(x=pd_res_df[sc_cn], y=pd_res_df[pv],
                                mode='lines+markers',
                                name='case: {} ({})'.format(cases_map[curr_case], curr_case)))
        
        
        pd_prop_sum_fig.layout.update(
            title= '{} for {} distribution.'.format(plotted_values_map[pv], dist_map[prop_dist]),
              xaxis_title="Number of 'decisions' (samples) used to train model (number of observations varies between cases)",
            
            xaxis=dict(
                title="Number of 'decisions' (samples) used to train model (number of observations varies between cases)",
#                 linecolor="black",  # Sets color of X-axis line
                showline=True,
                showgrid=True,
                gridwidth=0.3,
                gridcolor='#bdbdbd'
            ),            
            yaxis=dict(
                title=plotted_values_map[pv],
#                 showline=True,
                showgrid=True,
                gridwidth=0.3,
                gridcolor='#bdbdbd'
            ),
#             legend_title_text="Cases:",
            font=dict(
#                 family="Courier New, monospace",
                size=10,
                color="black"
            )
        )
        
        # save_figure
        fig_name = '{}-{}.html'.format(pv, prop_dist)
        full_save_pth = '{}/{}'.format(plts_save_prfx, fig_name)
        pd_prop_sum_fig.write_html(full_save_pth)

        # uni_prop_sum_fig.show()

In [10]:
with open('./data/weighted_props_norm_m150_sd30_sc100.json', 'r') as npropj:
    np_str = npropj.read()
    np_dict = json.loads(np_str)['props']
    
np = [v for v in np_dict.values()]
    
with open('./data/weighted_props_weib_rl5_a10_sc100.json', 'r') as wpropj:
    wp_str = wpropj.read()
    wp_dict = json.loads(wp_str)['props']

wp = [v for v in wp_dict.values()]

up = [1 / 300 for _ in range(300)]

In [11]:
props_fig = go.Figure()

all_props = [up, np, wp]
all_props_descs = ['Uniform', 'Normal', 'Weibull']

for ps, pds in zip(all_props, all_props_descs):

    props_fig.add_trace(
        go.Scatter(
            x=list(range(300)), y=ps, mode='lines', name=pds))



props_fig.layout.update(
    title= 'Used propensity distributions'.format(plotted_values_map[pv], dist_map[prop_dist]),
      xaxis_title="Variant 'id'",

    xaxis=dict(
        title="Variant's propensity / probability it will be 'chosen'",
#                 linecolor="black",  # Sets color of X-axis line
        showline=True,
        showgrid=True,
        gridwidth=0.3,
        gridcolor='#bdbdbd'
    ),            
    yaxis=dict(
        title=plotted_values_map[pv],
#                 showline=True,
        showgrid=True,
        gridwidth=0.3,
        gridcolor='#bdbdbd'
    ),
#             legend_title_text="Cases:",
    font=dict(
#                 family="Courier New, monospace",
        size=10,
        color="black"
    )
)    
props_fig.write_html(plts_save_prfx + '/props.html')