In [None]:
import re
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import numpy as np
import os

import error_log_reader
import function_args
import plot_functions

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
dir_examples04 = '/Users/davidkohn/dev/obsidian/output/examples04'
dir_gascoyne_01_01 = '/Users/davidkohn/dev/obsidian/output/gascoyne_config_01_01/'
dir_gascoyne_02_01 = '/Users/davidkohn/dev/obsidian/output/gascoyne_config_02_01/'
dir_gascoyne_02_02 = '/Users/davidkohn/dev/obsidian/output/gascoyne_config_02_02/'

# Likelihood search
search for lines with 'likelihood' in them

## gascoyne shard

In [None]:
out = error_log_reader.get_info(
    dir_gascoyne_01_01,
    **function_args.kwargs_likelihood
)

## plot likelihoods for each parameter type

In [None]:
fontsize = 20
xstr = 'Log likelihood'
ystr = 'Frequency'
title_str = 'frequency_loglikelihood_{}.png'
fig_height = 10
fig_width = 10

for find_str in find_str_list:
    print(find_str)
    fig = plt.figure(figsize = (fig_width, fig_height))
    for shard in out_likelihoods:
        x = shard[find_str]
        if x:
            plt.hist(x)
    plt.xlabel(
        xstr,
        fontsize = fontsize
    )
    plt.ylabel(
        ystr,
        fontsize = fontsize
    )
    plt.title(
        find_str,
        fontsize = fontsize
    )
    plt.savefig(title_str.format(find_str.replace(' ', '-')))

# Convergence info search
search for lines with convergence info in them

In [None]:
# recast time strings as datetime objects -> not being used at the moment
"""regex = '([0-9][0-9]:[0-9][0-9]:[0-9][0-9]\.[0-9]+)'
new_time_list = [
    datetime.datetime.strptime((re.findall(regex, time_line)[0]), '%H:%M:%S.%f').time()
    for time_line in time_list
]"""

## cooper basin obsidian

In [None]:
out_convergence = error_log_reader.get_info(
    dir_examples04,
    **function_args.kwargs_convergence
)

## gascoyne

In [None]:
out_convergence = error_log_reader.get_info(
    dir_gascoyne_01_01,
    **function_args.kwargs_convergence
)

In [None]:
c = out_convergence[0].T
param_range = c.shape[0]

In [None]:
fig_width = 10
fig_height = 10
xstr = 'Rhat bin'
ystr = 'Frequency'
title_str = 'Histogram of rhat for all chains'
save_str = 'rhat_histogram'
fontsize = 20

fig = plt.figure(
    figsize = (fig_width, fig_height)
)

ax = fig.gca()
y = c[:, -1]
plt.hist(y)

plt.xlabel(
    xstr,
    fontsize = fontsize
)
plt.ylabel(
    ystr,
    fontsize = fontsize
)
plt.title(
    title_str,
    fontsize = fontsize
)
out_path = os.path.join(
    dir_gascoyne_01_01, 
    save_str
)
plt.savefig(
    out_path
)
plt.clf()

In [None]:
fig_width = 10
fig_height = 10
xstr = 'MCMC iteration'
ystr = 'Rhat'
title_str = 'Rhat over MCMC iterations for all chains'
save_str = 'rhat_iterations'
fontsize = 20

fig = plt.figure(
    figsize = (fig_width, fig_height)
)
ax = fig.gca()
for param_idx in range(param_range):
    y = c[param_idx,:]
    y = y[~np.isnan(y)]
    x = range(len(y))
    idx = slice(1000, len(y))
    plt.plot(x[idx], y[idx])
    
plt.xlabel(
    xstr,
    fontsize = fontsize
)
plt.ylabel(
    ystr,
    fontsize = fontsize
)
plt.title(
    title_str,
    fontsize = fontsize
)
out_path = os.path.join(
    dir_gascoyne_01_01, 
    save_str
)
plt.savefig(
    out_path
)
plt.clf()

In [None]:
plot_functions.make_convergence_plot(
    c, 
    save_dir = parent_dir,
    param_range = param_range
)

# Stats table search
search for the stats table
the stats table contains the following info:
    0. ChainID
    1. Length - no. samples
    2. MinEngy - lowest energy
    3. CurrEngy - last state
    4. Sigma - proposal width
    5. AcptRt
    6. GlbAcptRt
    7. Beta - inverse temperature of the chain when this state was recorded
        - from obsidian/src/infer/mcmctypes.hpp
    8. SwapRt
    9.GlbWapRt

In [None]:
out_stats = error_log_reader.get_info(
    dir_gascoyne_01_01,
    **function_args.kwargs_statstable
)

In [None]:
array_all = np.stack(out_stats[0], axis = 2)
vals = array_all[3, 8, :]
plt.plot(vals)
plt.show()

## cooper basin

In [None]:
# recast time strings as datetime objects -> unused for now
"""new_time_list = [
    error_log_reader.extract_time_from_string(time_line)
    for time_line in time_list
]"""

# Average evaluation time search

In [None]:
out_evaltime = error_log_reader.get_info(
    dir_gascoyne_01_01,
    **function_args.kwargs_evaluationtime
)

In [None]:
xstr = 'Forward model and likelihood average evaluation time in milliseconds'
ystr = 'Frequency'
title_str = 'Histogram of average evaluation time for sensor {}'
save_str = 'sensor-eval-time-{}.png'
fontsize = 20
fig_width = 10
fig_height = 10
save_dir = dir_gascoyne_01_01
for key in out_evaltime[0].keys():
    plot_data = []
    for sub_dict in out_evaltime:
        plot_data += sub_dict[key]
    fig = plt.figure(figsize=(fig_width,fig_height))
    plt.hist(plot_data)
    plt.xlabel(
        xstr,
        fontsize = fontsize
    )
    plt.ylabel(
        ystr,
        fontsize = fontsize
    )
    plt.title(
        title_str.format(key),
        fontsize = fontsize
    )
    out_path = os.path.join(save_dir, save_str.format(key))
    plt.savefig(
        out_path
    )