In [None]:
import re
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import numpy as np
import error_log_reader
import os

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
dir_examples04 = '/Users/davidkohn/dev/formation-boundaries/output/examples04'
dir_gascoyne01 = '/Users/davidkohn/dev/formation-boundaries/output/gascoyne_config_01/'

# Likelihood search
search for lines with 'likelihood' in them

## gascoyne shard

In [None]:
str_log = 'shard.pbs.e2079115'
fpath = os.path.join(dir_gascoyne01, str_log)
likelihood_str_list, time_list = error_log_reader.parse_error_logs(
    fpath,
    start_signal_func = error_log_reader.start_signal_str,
    end_signal_func = error_log_reader.end_signal_new_line,
    start_signal_str = 'likelihood', 
    line_transform_func = error_log_reader.likelihood_line_transform, 
    line_list_transform_func = error_log_reader.likelihood_line_list_transform
)
find_str_list = [
    'magnetic likelihood sigma',
    'magnetics',
    'gravity likelihood sigma',
    'gravity'
]
likelihood_list = error_log_reader.parse_likelihoods(find_str_list, likelihood_str_list)

In [None]:
plt.hist(likelihood_list[3])

## gascoyne obsidian

In [None]:
str_log = 'obsidian.pbs.e2093733'
fpath = os.path.join(dir_examples04, str_log)
likelihood_str_list, time_list = error_log_reader.parse_error_logs(
    fpath,
    start_signal_func = error_log_reader.start_signal_str,
    end_signal_func = error_log_reader.end_signal_new_line,
    start_signal_str = 'likelihood', 
    line_transform_func = error_log_reader.likelihood_line_transform, 
    line_list_transform_func = error_log_reader.likelihood_line_list_transform
)

## cooper basin shard

In [None]:
str_log = 'shard_small.pbs.e2093734'
fpath = os.path.join(dir_examples04, str_log)
likelihood_str_list, time_list = error_log_reader.parse_error_logs(
    fpath,
    start_signal_func = error_log_reader.start_signal_str,
    end_signal_func = error_log_reader.end_signal_new_line,
    start_signal_str = 'likelihood', 
    line_transform_func = error_log_reader.likelihood_line_transform, 
    line_list_transform_func = error_log_reader.likelihood_line_list_transform
)
find_str_list = [
    'magnetic likelihood sigma',
    'magnetics',
    'gravity likelihood sigma',
    'gravity'
]
likelihood_list = error_log_reader.parse_likelihoods(find_str_list, likelihood_str_list)

# Convergence info search
search for lines with convergence info in them

## cooper basin obsidian

In [None]:
str_log = 'obsidian.pbs.e2093733'
fpath = os.path.join(dir_examples04, str_log)
converged_list, time_list = error_log_reader.parse_error_logs(
    fpath,
    start_signal_func = error_log_reader.start_signal_str,
    end_signal_func = error_log_reader.end_signal_new_line,
    start_signal_str = 'mcmc.hpp:252', 
    line_transform_func = error_log_reader.converged_line_transform, 
    line_list_transform_func = error_log_reader.converged_line_list_transform
)

In [None]:
# recast time strings as datetime objects
regex = '([0-9][0-9]:[0-9][0-9]:[0-9][0-9]\.[0-9]+)'
new_time_list = [
    datetime.datetime.strptime((re.findall(regex, time_line)[0]), '%H:%M:%S.%f').time()
    for time_line in time_list
]

In [None]:
converged_all = np.stack(converged_list, axis = 1)
x = converged_all[1,:]
x = x[~np.isnan(x)]

In [None]:
# plot
fig_width = 10
fig_height = 10

fig = plt.figure(figsize = (fig_width, fig_height))

fig, axes = plt.subplots(
    2, 2, 
    #sharex=True, 
    #sharey=True, 
    figsize = (fig_width, fig_height)
)
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top='off', bottom='off', left='off', right='off')
plt.grid(False)

iterations = range(len(x))

axes[0][0].plot(iterations, x)
idx = slice(0, 100)
axes[0][1].plot(iterations[idx], x[idx])
idx = slice(100, 1000)
axes[1][0].plot(iterations[idx], x[idx])
idx = slice(1000, len(x))
axes[1][1].plot(iterations[idx], x[idx])

plt.xlabel('Iteration')
plt.ylabel('Gelman-Rubin R')
plt.title('Gelman-Rubin R for different windows of iterations for examples04 experiment')
plt.savefig('gelman_rubin_stats_examples04.pdf')

# Stats table search
search for the stats table
the stats table contains the following info:
    0. ChainID
    1. Length
    2. 
    3. 
    4. 
    5. 
    6. 
    7. Beta - The inverse temperature of the chain when this state was recorded
        - from obsidian/src/infer/mcmctypes.hpp
    8. 
    9.

## cooper basin

In [None]:
str_log = 'obsidian.pbs.e2093733'
fpath = os.path.join(dir_examples04, str_log)

stats_table_list, time_list = error_log_reader.parse_error_logs(
    fpath,
    start_signal_func = error_log_reader.start_signal_str,
    end_signal_func = error_log_reader.end_signal_stats_table,
    start_signal_str = 'mcmc.hpp:242', 
    line_transform_func = error_log_reader.stats_table_line_transform, 
    line_list_transform_func = error_log_reader.stats_table_line_list_transform
)

In [None]:
# recast time strings as datetime objects
new_time_list = [
    error_log_reader.extract_time_from_string(time_line)
    for time_line in time_list
]

In [None]:
print(len(stats_table_list))
array_all = np.stack(stats_table_list, axis = 2)
print(np.shape(array_all))

In [None]:
for idx, col in enumerate(stats_table_list[0].columns): print(idx, col)

In [None]:
#vals = array_all[0, 1, :]
vals = array_all[3, 8, :]
plt.plot(vals)
plt.show()

In [None]:
idx = 1
time_line = new_time_list[idx]
df = stats_table_list[idx]
print(time_line)
display(df)