# Spectrogram Analysis Jupyter notebook

### It’s publication time.

## Top priority tasks

0) Execute some pure predictions on the cluster (medium dataset).

    SORT OUT EXPONENTIAL VS UNIFORM (only run one, try breaking apart noise from model, independent regularization term).
    
0b) Try to run Cyclic Exponential tests.

1) Edit the article.

2) prepare for meeting with Marios, prepare pure prediction, multiple kinds.

3) Fix "ip: nearest", change to label "interpolation"

## Computational complexity tasks
1) save the repaired dictionaries

2) delete old crappy results (clean the experiment_results directory)

3) Fix tqdm notebook software bug.

4) Delete unnecessary python kernels.

5) Edit notebook, delete cells.

In [1]:
#from IPython.core.interactiveshell import InteractiveShell
#InteractiveShell.ast_node_interactivity = "all"
from platform import python_version
import copy
import glob
import matplotlib
import os
from scipy.io import loadmat
import scipy.stats as stats
import sys
%matplotlib inline
import seaborn as sns
print(python_version())

%run -i 'PyFiles/imports.py'
%run -i 'PyFiles/helpers.py'
%run -i "PyFiles/experiment.py"
%run -i "PyFiles/analysis.py"

# to stop print output:
#def blockPrint():
#    sys.stdout = open(os.devnull, 'w')

3.8.5


# Plotting Helper Functions

In [2]:
def get_target_freq(experiment):
    """
    
    """
    freq_spec_lst = np.array(experiment["f"])[experiment["resp_idx"]]
    target_freq_spec = np.round(np.mean(freq_spec_lst),1)
    return(target_freq_spec)

def IdxMatch(experiment, 
             resp_idx_range = range(272, 301), 
             n_obs = 28
             ):
    """
    obs3:
    obs4: resp_idx_range = range(272, 301) n_obs = 28
    obs5:
    """
    #if obs_idx_lst == experiment["obs_idx"]:
    #    if resp_idx_lst == experiment["resp_idx"]:
    #        return True
    resp_idx_list = list(resp_idx_range)
    if resp_idx_list == experiment["resp_idx"]:
        if n_obs == len(experiment["obs_idx"]):
            return True
    return False

def freq_plot(analysis_obj, experiment_num = 0, title = "Observer 4 experiment",  save = None,
             custom_colors = True, mean = False):
    """ 
    This function takes an EchoStateAnalysis object and creates a frequency loss plot.
    
    Arguments:
        analysis_obj: the EchoStateAnalysis object
        experiment_num: the index number of the EchoStateExperiment.
        title: the title of the experiment.
        save: the path to which to save the image.
    """
    freq_rDF = analysis_obj.rDF.copy()
    freq_loss_df =  freq_rDF[freq_rDF["experiment #"] == experiment_num]
    
    plt.figure(figsize = (12,5))
    modelz = np.unique(freq_loss_df.model)
    if custom_colors: #len(modelz) == 4 and 
        palette_ = dict(zip(["cyclic", "uniform", "ip: linear", "exponential", "random_exponential",
                            "random_uniform", "ip: nearest"], 
                            sns.color_palette("tab10")[0:7] ))
    else:
        palette_ = dict(zip(modelz, sns.color_palette("tab10")[0:4]))
    palette_["ip: nearest"] = sns.color_palette("tab10")[1]
    for i, model in enumerate(modelz):
        
        freq_rDF_spec = freq_loss_df[freq_loss_df.model == model]
        mean_freq_rDF_spec = freq_rDF_spec.groupby(by = "freq").mean()
        mean_freq_rDF_spec["model"] = model
        
        #multi-model df concatenation
        if not i:
            mean_freq_rDF = mean_freq_rDF_spec 
        else: 
            mean_freq_rDF = pd.concat([mean_freq_rDF, mean_freq_rDF_spec], axis = 0)
        #model mean lines 
        plt.axhline(np.mean(freq_rDF_spec.L2), color = palette_[model], linestyle = "dotted")
    if mean:
        mean_freq_rDF["freq"] = list(mean_freq_rDF.index)
        
        sns.lineplot(x = "freq", y = "L2", data = mean_freq_rDF, alpha = 0.9, hue = "model", palette = palette_)
    else:
        sns.lineplot(x = "freq", y = "L2", data = freq_loss_df, alpha = 0.9, hue = "model", palette = palette_)
    
    plt.title(title, fontsize = 16)
    plt.xlabel("Frequency (Hz)", fontsize = 15)
    plt.ylabel("L2 Loss", fontsize = 15)
    
    if save:
        plt.savefig(save)


def prediction_plot(analysis_obj, experiment_num = 0, title = "Observer 4 experiment", 
                    rolling = None, save = None):
    """
    """
    #TODO
    if save:
        plt.savefig(save)
        
    
#pickle_A.experiment_lst[0] = pickle_zhizhuo.experiment_lst[0] 
def quick_plot(n, analysis, outfile = None, mean = False, rolling = 150, force_build = True):
    """
    """
    if type(analysis) != list:
        analysis = [analysis]
    
    #only save the plot if requested.
    if outfile:
        save_path = "fig/" + outfile 
        save_path_time = save_path + "_time"
        save_path_freq = save_path + "_freq"
    else:
        save_path_time, save_path_freq  = None, None
        outfile = ""
    
    for i, analysis_ in enumerate(analysis):
        models = list(analysis_.experiment_lst[n]["prediction"].keys())
        if not hasattr(analysis_, 'rDF'):
            analysis_.build_loss_df(group_by = "freq", models = models)
        if force_build:
            analysis_.build_loss_df(group_by = "freq", models = models)
        freq_plot(analysis_, n, title = outfile + " Avg. L2 Loss vs Frequency", 
                  save = save_path_freq, mean = mean)
        time_plot(analysis_, experiment_num = n, rolling = rolling, save = save_path_time,
              title = outfile + " Avg. L2 Loss vs Time (rolling average)", mean = mean)
        #plt.show()
    return analysis[0].rDF

def show_images(analysis_, exper_num = 0, aspect = 10, sigma = 1, method = "heatmap"):
    """
    """
    exper_ = analysis_.experiment_lst[exper_num]
    Train, Test = exper_["xTr"], exper_["xTe"]
    predictions = exper_["prediction"].copy()

    for i in predictions.values():
        a = np.array(i)
        #print(a.shape)
        
    nrmses      = exper_["nrmse"].copy()
    predictions["ground_Truth"]  = Test
    nrmses["ground_Truth"]       = 0
    #predictions["ground_Truth_smooth"]  = gaussian_filter(Test, sigma=sigma)
    #nrmses["ground_Truth_smooth"]       = 0
    
    fig, ax = plt.subplots(5,1, figsize = (12, 24))
    ax = ax.flatten()
    
    for i, (key, value) in enumerate(predictions.items()):
        print(i)
        arr = np.array(value)
        full_arr = np.concatenate((Train, arr), axis = 0)
        
        if method != "heatmap":
            plt.imshow(arr.T, aspect = aspect)
            plt.title(key +" R: " + str(nrmses[key]))
            plt.subplot(2,3,i)
            plt.show()       
        else:
            sns.heatmap(full_arr.T, ax = ax[i])
            ax[i].set_title(key +" R: " + str(nrmses[key]))
    plt.tight_layout()

def load_analysis(path, model = "uniform", bp = "experiment_results/publish/split_0.5/", ip_use_obs = True):
    """
    """
    assert model in ["uniform", "cyclic"]
    # path = 'experiment_results/publish/split_0.5/block_target*.pickle'
    path_list = glob.glob(bp + path)
    print(path_list)
    expers = EchoStateAnalysis(path_list, model = model, ip_use_observers = ip_use_obs, ip_method = "linear")
    return(expers)

In [3]:
def time_plot(analysis_obj, experiment_num = 0, title = "Observer 4 experiment", rolling = None, save = None,
             mean = False, custom_colors = True):
    """
    This function takes an EchoStateAnalysis object and creates a time loss plot.
    
    Arguments:
        analysis_obj: the EchoStateAnalysis object
        experiment_num: the index number of the EchoStateExperiment.
        title: the title of the experiment.
        save: the path to which to save the image.
    """
    
    time_rDF = analysis_obj.rDF.copy()
    time_loss_df =  time_rDF[time_rDF["experiment #"] == experiment_num].copy()
    
    plt.figure(figsize = (12,5))
    rollings = {}
    if custom_colors:
        palette_ = dict(zip(["cyclic", "uniform", "ip: linear", "exponential", "random_uniform",
                            "random_exponential", "ip: nearest"], sns.color_palette("tab10")[0:7]))
    else:
        palette_ = dict(zip(modelz, sns.color_palette("tab10")[0:4]))
    palette_["ip: nearest"] = sns.color_palette("tab10")[1]
    if rolling:
        #print("ROLLING", rolling)
        mean_ = []
        for i, model in enumerate(np.unique(time_loss_df.model)):
            filterr = time_loss_df["model"] == model
            time_rDF_spec = time_loss_df[filterr]
            time_rDF_spec["rolling_L2"] = -1
            
            #multi-model df concatenation
            if mean:
                time_sub_df = time_rDF_spec.groupby(by = "time").mean()
                time_sub_df["model"] = model

                time_sub_df.rolling_L2 = time_sub_df.L2.rolling(rolling).mean()

                if not i:
                    mean_time_rDF = time_sub_df
                    
                else: 
                    mean_time_rDF = pd.concat([mean_time_rDF, time_sub_df], axis = 0)
                    
                mean_time_rDF["time"] = list(mean_time_rDF.index)
                
            else:
                rollings[model] = time_rDF_spec.L2.rolling(rolling).mean()
            #display(rollings)
            plt.axhline(np.mean(time_rDF_spec.L2.copy()), linewidth = 1.5,
                        color = palette_[model], linestyle = "dotted")
        
        
        modelzz = np.unique(time_loss_df.model)

        for model in modelzz:
            if mean:
                pass
                #mean_time_rDF.rolling_L2[mean_time_rDF.model == model] = rollings[model]
                
            else:
                time_loss_df.rolling_L2[time_loss_df.model == model] = rollings[model]
        if mean:
  
            sns.lineplot(x = "time", y = "rolling_L2", data = mean_time_rDF, palette = palette_, 
                         alpha = 0.75, linewidth = 2.5, hue = "model") 
        else:
            sns.lineplot(x = "time", y = "rolling_L2", data = time_loss_df, palette = palette_, 
                         alpha = 0.85,linewidth = 1.5, hue = "model") 
    else:
        print("NAH", rolling)
        sns.lineplot(x = "time", y = "L2", data = time_loss_df,  alpha = 0.9, hue = "model")
    lb = 0
    ub = np.percentile(time_loss_df.L2, 95)
    title = title
    plt.title(title, fontsize = 25)
    plt.xlabel("Time (seconds)", fontsize = 20)
    plt.ylabel("L2 Loss $(y-\hat{y})^2$", fontsize = 20)
    plt.legend(prop={"size":17})
    sns.despine()
    if save:
        plt.savefig(save)
        



In [4]:
%%time

#rand_path = 'final_results/*random.pickle'

#column_expers = load_analysis(path = "*", bp = "pure_prediction/", 
#                              model = "cyclic", ip_use_obs = False)

rand_path = 'final_results/*random.pickle'
random_path_list = glob.glob(rand_path)

random_path_list = [#'block_N_Targidx_115N_Obsidx_74.pickle', 
                    #'experiment_results/publish/split_0.5/block_N_Targidx_115N_Obsidx_143.pickle',
                    'experiment_results/medium/split_0.5/column_cyclic_unif_W_N_Targidx_1103N_Obsidx_0.pickle']
                   #'experiment_results/medium/split_0.5/column_cyclic_unif_W_N_Targidx_1103N_Obsidx_0.pickle']

from tqdm.autonotebook import tqdm


COMBINED_ANALYSIS_NOT_LOADED = True
if COMBINED_ANALYSIS_NOT_LOADED:
    
    print(random_path_list)
    random_expers = EchoStateAnalysis(random_path_list, model = "uniform",
                                  ip_use_observers = True, ip_method = "nearest",
                                     force_random_expers = True, pure_prediction = True)
    
    #random_expers = EchoStateAnalysis(random_path_list, model = "uniform",
    #                              ip_use_observers = True, ip_method = "linear")
    
    #dl_path_list = glob.glob('final_results/*cyclic.pickle')

    #dl_path_list = ["final_results/500_900Hz_cyclic.pickle",
    #                "final_results/150_500Hz_cyclic.pickle",
    #               "experiment_results/publish/split_0.5/block_cyclictargetHz_ctr:_999targetKhz:_0.1__obskHz:_0.25.pickle",
    #               "experiment_results/publish/split_0.5/block_cyclictargetHz_ctr:_999targetKhz:_0.5__obskHz:_1.0.pickle"]#"experiment_results/publish/split_0.5/block_cyclicN_Targidx_40N_Obsidx_26.pickle"]
    #dl_expers = EchoStateAnalysis(dl_path_list, model = "cyclic",
    #                              ip_use_observers = True, ip_method = "linear")

#end = time()
#end - start

['experiment_results/medium/split_0.5/column_cyclic_unif_W_N_Targidx_1103N_Obsidx_0.pickle']


HBox(children=(HTML(value='experiment list, loading data...'), FloatProgress(value=0.0, max=1.0), HTML(value='…

keys dict_keys(['prediction', 'nrmse', 'best arguments', 'obs_idx', 'resp_idx', 'input_weight_type', 'Train', 'Test', 'xTe', 'xTr', 'experiment_inputs', 'get_observer_inputs'])
xTe shape (100, 1103)



HBox(children=(HTML(value='experiment list, fixing interpolation...'), FloatProgress(value=0.0, max=1.0), HTML…

CYCLIC DISABLED
{'noise': 0.1584675971825879, 'connectivity': 0.0005452050474823753, 'spectral_radius': 0.5017982447147369, 'regularization': 1793.3769622864643, 'leaking_rate': 0.7393609389662743, 'n_nodes': 1000, 'feedback': True}
uniform
COLUMN GOLUMN!
OVERWRITING f
OVERWRITING T
OVERWRITING A

[]
[]
total experiments completed: 1
total experiments half complete: 0
total experiments not yet run: 0
Percentage of tests completed: 100.0%
CPU times: user 27.8 s, sys: 634 ms, total: 28.4 s
Wall time: 16 s


In [5]:
#time_plot(random_expers, experiment_num = 0,mean = True, rolling = 1)
type(np.array([1]))

numpy.ndarray

In [6]:
for i in range(len(random_expers.experiment_lst)):
    quick_plot(i, random_expers, mean = True, rolling = 3)
    #show_images(column_expers)

HBox(children=(HTML(value='processing path list...'), FloatProgress(value=0.0, max=1.0), HTML(value='')))




IndexError: index 512 is out of bounds for axis 0 with size 512

In [None]:
obs_idx = random_expers.experiment_lst[0]["obs_idx"]
resp_idx = random_expers.experiment_lst[0]["resp_idx"]
dual_lambda = False

In [None]:
hi = np.random.uniform(size = (10,100))
hi2 = np.random.uniform(size = (10,100))
hi.shape
ridge_x = hi.T @ hi + 0.01 * np.eye(hi.shape[1])
ridge_y = hi.T @ hi2


In [None]:
%%time 
np.linalg.solve(ridge_x, ridge_y)

In [None]:
def build_unq_dict_lst( lst1, lst2, key1 = "start_indices", key2 = "random_seed"):
        dict_lst = []
        for i in range(len(lst1)):
            for j in range(len(lst2)):
                dictt = {}
                dictt[key1] =  lst1[i]
                dictt[key2] =  lst2[j]
                dict_lst.append(dictt)
        return dict_lst

build_unq_dict_lst([1,2,3], [4,5,6])

In [None]:
class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
print(bcolors.FAIL + "Warning: No active frommets remain. Continue?" + bcolors.ENDC)

In [None]:
%%time 
scipy.linalg.solve(ridge_x, ridge_y)

In [None]:
%%time
def build_distance_matrix()
    def calculate_distance_matrix(obs_idx, resp_idx):
        obs_idxx_arr = np.array(obs_idx)
        for i, resp_seq in enumerate(resp_idx):
            DistsToTarg = abs(resp_seq - obs_idxx_arr).reshape(1, -1)
            if i == 0:
                distance_np_ = DistsToTarg
            else:
                distance_np_ = np.concatenate([distance_np_, DistsToTarg], axis = 0)
        #if verbose == True:
        #    display(pd.DataFrame(distance_np))
        distance_np_ = distance_np_
        return(distance_np_)

    if not dual_lambda:

        distance_np = calculate_distance_matrix(obs_idx, resp_idx)

    else:

        def split_lst(lst, scnd_lst):

            lst = np.array(lst)
            breaka = np.mean(scnd_lst)
            scnd_arr = np.array(scnd_lst)
            lst1, lst2 = lst[lst < scnd_arr.min()], lst[lst > scnd_arr.max()]

            return list(lst1), list(lst2)

        obs_lsts = split_lst(obs_idx, resp_idx) #good!
        self.distance_np = [calculate_distance_matrix(obs_lst) for obs_lst in obs_lsts]
        

def get_exp_weights():
    """
    #TODO: description
    change the automatic var assignments
    """
    build_distance_matrix()
    random_state = np.random.RandomState(self.seed)
    self.exp_w(random_state)
    n_temp = len(self.exp_weights)
    sign = random_state.choice([-1, 1], self.exp_weights.shape)

    exp_weights *= sign

In [None]:
%%time
def calculate_distance_matrix(obs_idx):
    obs_idxx_arr = np.array(obs_idx)
    for i, resp_seq in enumerate(self.resp_idx):
        DistsToTarg = abs(resp_seq - obs_idxx_arr).reshape(1, -1)
        if i == 0:
            distance_np_ = DistsToTarg
        else:
            distance_np_ = np.concatenate([distance_np_, DistsToTarg], axis = 0)
    #if verbose == True:
    #    display(pd.DataFrame(distance_np))
    distance_np_ = distance_np_
    if verbose == True:
        print("distance_matrix completed " + str(self.distance_np_.shape))
        display(self.distance_np_)
    return(distance_np_)

print("a")

In [None]:
random_expers.experiment_lst[0]["best arguments"]

In [None]:
rand_path = 'final_results/*random.pickle'
random_path_list = glob.glob(rand_path)


COMBINED_ANALYSIS_NOT_LOADED = True
if COMBINED_ANALYSIS_NOT_LOADED:
    
    print(random_path_list)
    random_expers2 = EchoStateAnalysis(random_path_list, model = "uniform",
                                  ip_use_observers = True, ip_method = "linear",
                                     force_random_expers = True)
    

In [None]:
random_state = np.random.RandomState(123)
random_state.choice([-1,1])

In [None]:
def try_other_exper_W(analysis, w_dest, w_source,  models = ["exponential"], random_seed = 155,
                     models2avg = 100):
    
    scores = {}
    for model in models:
        vals = w_source["best arguments"][model]
        my_esn = EchoStateNetwork(**vals, model_type = model)

        obs_idx_temp = w_dest["obs_idx"].copy()
        resp_idx_temp = w_dest["resp_idx"].copy()
        A_temp = w_dest["A"].copy()
        tr_timesteps = np.array(w_dest["xTr"]).shape[0]
        Train = A_temp[:tr_timesteps ,obs_idx_temp]
        Test = A_temp[tr_timesteps :,obs_idx_temp]
        xTr = w_dest["xTr"].copy()
        xTe = w_dest["xTe"].copy()
        print("train shape", Train.shape)

        for i in range(models2avg):
            
                
            vals["random_seed"] = random_seed + i
            my_esn = EchoStateNetwork(**vals, model_type = model, obs_idx = obs_idx_temp, resp_idx = resp_idx_temp)
            my_esn.train(x = Train, y = xTr)
            pred_spec = my_esn.predict(n_steps = Test.shape[0], x = Test)
            score= (np.mean((pred_spec - xTe)**2))
            
            if not i:
                pred = pred_spec.copy()
                
                scores[model] = [score]
            else: 
                pred += pred_spec
                scores[model].append(score)
        w_dest["prediction"][model] = pred/models2avg
    models = list(w_dest["prediction"].keys())
    analysis.build_loss_df(group_by = "freq", models = models,  columnwise = False)
    for i in range(len(analysis.experiment_lst)):
        quick_plot(i, analysis, mean = True, rolling = 150)
    return(scores)

In [None]:
random_expers.experiment_lst[0]["prediction"].keys()

In [None]:
try_other_exper_W(random_expers, random_expers.experiment_lst[0], 
                  random_expers.experiment_lst[0], models = ["random_exponential"], models2avg = 100,
                  random_seed = 11)

In [None]:
#for i in range(len(random_expers.experiment_lst)):
#        quick_plot(i, random_expers, mean = False, rolling = 150)

In [None]:
barplot_dict = try_other_exper_W(random_expers2, random_expers2.experiment_lst[1], 
                  random_expers2.experiment_lst[1], models = ["uniform"], models2avg = 100,
                  random_seed = 11)

In [None]:
for i in range(len(random_expers2.experiment_lst)):
        quick_plot(i, random_expers2, mean = True, rolling = 150)

In [None]:
def get_frequencies(trial = 1):
    """
    get frequency lists
    """
    if trial == 1:
          lb_targ, ub_targ, obs_hz  = 210, 560, int(320 / 2)
    elif trial == 2:
          lb_targ, ub_targ, obs_hz  = 340, 640, 280
    elif trial == 3:
          lb_targ, ub_targ, obs_hz  = 340, 350, 20#40
    elif trial == 4:
          lb_targ, ub_targ, obs_hz  = 60, 350, 40
    elif trial == 5:
          lb_targ, ub_targ, obs_hz  = 50, 200, 40
    if trial == 6:
          lb_targ, ub_targ, obs_hz  = 130, 530, 130
    if trial == 7:
          lb_targ, ub_targ, obs_hz  = 500, 900, 250
    obs_list =  list( range( lb_targ - obs_hz, lb_targ))
    obs_list += list( range( ub_targ, ub_targ + obs_hz))
    resp_list = list( range( lb_targ, ub_targ))
    return obs_list, resp_list
get_frequencies(3)

In [None]:
obs_idx = random_expers.experiment_lst[1]["resp_idx"]
print("resp_idx",obs_idx)
f = np.round(np.array(random_expers.experiment_lst[1]["f"]), 2)
print("frequencies", f[obs_idx])

#print("frequencies", f[range(143, 257)])


In [None]:
print(random_expers.experiment_lst[1]["xTe"].shape)

In [None]:
quick_plot(1, random_expers, mean = True, outfile = "150_500Hz_mean")
#quick_plot(1, random_expers, mean = False, outfile = "150_500Hz")
#quick_plot(0, random_expers, mean = True, outfile = "500_900Hz_mean")
#quick_plot(3, random_expers, mean = True, outfile = "1000Hz_mean")

In [None]:
for i in range(len(random_expers.experiment_lst)):
    quick_plot(i, random_expers, mean = True, outfile = "500_900Hz_mean")
    show_images(random_expers)

In [None]:
quick_plot(3, random_expers, mean = False, force_build = True ,outfile = "1000Hz")

In [None]:
for i in range(len(random_expers.experiment_lst)):
    
    quick_plot(i, random_expers, mean = True, rolling = 40)

In [None]:
for i in range(len(dl_expers.experiment_lst)):
    quick_plot(i, dl_expers, mean = True)
    #show_images(dl_expers, exper_num = i)

#dl_expers.hyper_parameter_plot()
if COMBINED_ANALYSIS_NOT_LOADED:
    #check out whats what
    n = 1
    hi = [dl_expers, random_expers]
    quick_plot(n, hi, mean = True)


In [None]:
from copy import copy
def recursive_dict_combine(exper1, exper2):
    for key1, value1 in exper1.items():
        if type(exper1[key1]) == dict:
            #print(key1)
            exper1[key1] = {**exper1[key1], **exper2[key1]}
            #print(exper1[key1])
    return exper1
def analysis_combine(analysis1_, analysis2_, n):
    analysis1 = copy(analysis1_)
    analysis2 = copy(analysis2_)
    for i, exper in enumerate(analysis1.experiment_lst):
        if i >= 2: 
            break
        analysis1.experiment_lst[i] = recursive_dict_combine(dl_expers.experiment_lst[i], 
                                                             random_expers.experiment_lst[i])
    return(analysis1)
#if COMBINED_ANALYSIS_NOT_LOADED:
combined_analysis = analysis_combine(random_expers, dl_expers, 0)


In [None]:
for key, values in combined_analysis.experiment_lst[1]["prediction"].items():
    print(key)
    print(type(i))

#save the combined analysis:
#import pickle

a = {'combined_analysis': combined_analysis}

with open('final_results/combined_analysis.pickle', 'wb') as handle:
    pickle.dump(a, handle, protocol=pickle.HIGHEST_PROTOCOL)
if not COMBINED_ANALYSIS_NOT_LOADED:
    with open('final_results/combined_analysis.pickle', 'rb') as handle:
        combined_analysis = pickle.load(handle)["combined_analysis"]

In [None]:
combined_analysis = random_expers
quick_plot(1, combined_analysis, outfile = "150_500Hz", mean = False)

In [None]:
quick_plot(1, combined_analysis, outfile = "150_500Hz", mean = False)

In [None]:
combined_analysis.hyper_parameter_plot()

In [None]:
!ls experiment_results/medium/split_0.5

In [None]:

#show_images(random_expers, 1)

g_truth = combined_analysis.experiment_lst[1]["xTe"]

In [None]:
%matplotlib widget

xx, yy = combined_analysis.experiment_lst[1]["xTe"].shape
i_coords, j_coords = np.meshgrid(range(xx), range(yy), indexing='ij')
k_coords = (combined_analysis.experiment_lst[1]["prediction"]["uniform"] - g_truth) **2

# Data for a three-dimensional line
ax = plt.axes(projection='3d')
ax.plot_surface(i_coords, j_coords,  k_coords, rstride=1, cstride=1,
                cmap='viridis', edgecolor='none')
ax.set_title('surface');

In [None]:
%matplotlib widget

xx, yy = combined_analysis.experiment_lst[1]["xTe"].shape
i_coords, j_coords = np.meshgrid(range(xx), range(yy), indexing='ij')
k_coords = combined_analysis.experiment_lst[1]["prediction"]["ip: linear"]# - g_truth) **2

# Data for a three-dimensional line
ax = plt.axes(projection='3d')
ax.plot_surface(i_coords, j_coords,  k_coords, rstride=1, cstride=1,
                cmap='viridis', edgecolor='none')
ax.set_title('surface');

In [None]:
%matplotlib widget

xx, yy = combined_analysis.experiment_lst[1]["xTe"].shape
i_coords, j_coords = np.meshgrid(range(xx), range(yy), indexing='ij')
k_coords = combined_analysis.experiment_lst[1]["xTe"]

# Data for a three-dimensional line
ax = plt.axes(projection='3d')
ax.plot_surface(i_coords, j_coords,  k_coords, rstride=1, cstride=1,
                cmap='viridis', edgecolor='none')
ax.set_title('surface');

In [None]:
%matplotlib widget

xx, yy = combined_analysis.experiment_lst[1]["prediction"]["cyclic"].shape
i_coords, j_coords = np.meshgrid(range(xx), range(yy), indexing='ij')
k_coords = combined_analysis.experiment_lst[1]["prediction"]["cyclic"]

# Data for a three-dimensional line
ax = plt.axes(projection='3d')
ax.plot_surface(i_coords, j_coords,  k_coords, rstride=1, cstride=1,
                cmap='viridis', edgecolor='none')
ax.set_title('surface');

In [None]:
%matplotlib widget

residual = combined_analysis.experiment_lst[0]["prediction"]["cyclic"] - 

xx, yy = combined_analysis.experiment_lst[0]["prediction"]["cyclic"].shape
i_coords, j_coords = np.meshgrid(range(xx), range(yy), indexing='ij')
k_coords = combined_analysis.experiment_lst[0]["prediction"]["cyclic"]

# Data for a three-dimensional line
ax = plt.axes(projection='3d')
ax.plot_surface(i_coords, j_coords,  k_coords, rstride=1, cstride=1,
                cmap='viridis', edgecolor='none')
ax.set_title('surface');

In [None]:
%matplotlib widget

# Data for a three-dimensional line
ax = plt.axes(projection='3d')
ax.plot_surface(i_coords, j_coords,  k_coords, rstride=1, cstride=1,
                cmap='viridis', edgecolor='none')
ax.set_title('surface');

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
ax = plt.axes(projection='3d')

Axes3D(fig = ax, X = i_coords, Y = j_coords, Z = k_coords)
#plot_wireframe()
##*args, **kwargs

## Pure Prediction 2.0
The key to the pure prediction is to break it down. We are going to take in a parameter k, which is the maximum time-series to include in a block prediction. Then we will break the target matrix A into component sections.

This will simply require an appropriate series. The next step is to develop an appropriate method for combining the different matrices.

There can be two options: overlapping and not overlapping. If there are over-lapping predictions we take a simple average. Otherwise we simply combine them.

In [None]:
#enablePrint()
pickle_list = glob.glob('experiment_results/publish/*/*.pickle')
pickle_list = pickle_list[1:]
print(pickle_list)

#### check if path list has duplicates.

In [None]:
assert len(pickle_list) == len(list(np.unique(pickle_list)))

In [None]:

pickle_A = EchoStateAnalysis(pickle_list, model = "uniform", ip_use_observers = True, ip_method = "linear")

### The following helper functions are assisting me to accomplish getting the final figures.

In [None]:
pickle_A = dl_expers

In [None]:
target_freqs = []
for i, experiment in enumerate(pickle_A.experiment_lst):
    tf = get_target_freq(experiment)
    target_freqs.append(tf)
    #print(experiment["resp_idx"])
    if IdxMatch(experiment):
        freq_plot(pickle_A, experiment_num = i)
    if IdxMatch(experiment, n_obs = 96):
        freq_plot(pickle_A, experiment_num = i)
target_freqs


#### Add Zhizhuo Data

In [None]:
resp_idx = pickle_A.experiment_lst[1]["resp_idx"]
print(resp_idx)
f = np.array(pickle_A.experiment_lst[1]["f"])
f[resp_idx]

In [None]:
#freq_plot(pickle_A, 2, title = "Rena experiment: Avg. L2 Loss vs Frequency", save = "Rena_freq")
#time_plot(pickle_A, experiment_num = 2, rolling = 150, save = "Rena_time",
#          title = "Rena experiment: Avg. L2 Loss vs Time (rolling average)")
quick_plot(2, pickle_A, mean = True, rolling = 10)

In [None]:
#pickle_A.hyper_parameter_plot()

In [None]:
for i in range(len(pickle_A.experiment_lst)):
    freq_plot(pickle_A, i, mean = True)
    time_plot(pickle_A, experiment_num = i, rolling = 150, mean = True)

In [None]:
pickle_A.make_R_barplots()

pickle_rDF = pickle_A.rDF_time
plt.figure(figsize = (16,8))
pickle_loss_df_50_50_split =  pickle_rDF[pickle_rDF.split == 0.5]
#pickle_loss_df_50_50_split = pickle_loss_df_50_50_split[pickle_loss_df_50_50_split.model != "exponential"]

mean_ =  pickle_loss_df_50_50_split.R.rolling(50).mean()
colors = ["cyan", "red"]

sns.scatterplot(x = "time", y = pickle_loss_df_50_50_split["L2_loss"], data = pickle_loss_df_50_50_split, 
                hue = "model", alpha = 0.02)

sns.lineplot(x = "time", y = mean_, data = pickle_loss_df_50_50_split, 
             hue = "model", alpha = 0.9)
plt.title("block_N_Targidx_40N_Obsidx_26")

pickle_rDF = pickle_A.rDF
plt.figure(figsize = (16,8))
pickle_loss_df_50_50_split =  pickle_rDF[pickle_rDF.split == 0.7]
#pickle_loss_df_50_50_split = pickle_loss_df_50_50_split[pickle_loss_df_50_50_split.model != "exponential"]

mean_ =  pickle_loss_df_50_50_split.R.rolling(50).mean()
colors = ["cyan", "red"]

sns.scatterplot(x = "time", y = pickle_loss_df_50_50_split["R"], data = pickle_loss_df_50_50_split, 
                hue = "model", alpha = 0.02)

sns.lineplot(x = "time", y = mean_, data = pickle_loss_df_50_50_split, 
             hue = "model", alpha = 0.9)
plt.title("block_N_Targidx_40N_Obsidx_26")

pickle_rDF = pickle_A.rDF
plt.figure(figsize = (16,8))
pickle_loss_df_50_50_split =  pickle_rDF[pickle_rDF.split == 0.9]
#pickle_loss_df_50_50_split = pickle_loss_df_50_50_split[pickle_loss_df_50_50_split.model != "exponential"]

mean_ =  pickle_loss_df_50_50_split.R.rolling(50).mean()
colors = ["cyan", "red"]

sns.scatterplot(x = "time", y = pickle_loss_df_50_50_split["R"], data = pickle_loss_df_50_50_split, 
                hue = "model", alpha = 0.02)

sns.lineplot(x = "time", y = mean_, data = pickle_loss_df_50_50_split, 
             hue = "model", alpha = 0.9)
plt.title("block_N_Targidx_40N_Obsidx_26")

In [None]:

def make_pretty_pics(experiment_number = 0, 
                     modelz = ["ip: linear", "uniform",  "exponential"], #"zhizhuo",
                    show_images = False, show_residuals = False
                    ):
    #blockPrint()
    if experiment_number == 0:
        zhizhuo_label = "obs4"
    elif experiment_number == 4:
        zhizhuo_label = "obs5"
    else:
        zhizhuo_label = "experiment " + str(experiment_number)
        
    if experiment_number == 0:
        zhizhuo_label = "low frequency"
    
    spec = pickle_A.experiment_lst[experiment_number]
    f =  spec["f"]
    freqs_dict = { idx : f[idx] for idx in spec["obs_idx"]}
    freqs_ = [f[idx] for idx in spec["resp_idx"]]
    
    truth = spec["xTe"]
    if show_images:
        plt.imshow(truth, aspect = 0.01)
        plt.title("Ground truth")
        plt.show()
        fig, ax = plt.subplots(2,2, figsize = (12, 6))
        ax = ax.flatten()
        for i, model in enumerate(modelz):

            ax[i].imshow(spec["prediction"][model], aspect = 0.01)
            ax[i].set_title(model)
        plt.tight_layout()
        
    nrmses = []
    residuals = []
    for i, model in enumerate(modelz):
        pred_ = spec["prediction"][model]
        #if model == "zhizhuo":
        #    pred_ = np.flip(pred_, axis = 1)
        nrmse_spec = nrmse(pred_, truth)
        nrmses.append({model : nrmse_spec})
        residuals.append(np.abs(truth - pred_))
        
    if show_residuals:
        fig, ax = plt.subplots(2,2, figsize = (12, 6))
        ax = ax.flatten()
        for i, model in enumerate(modelz):
            sns.heatmap(residuals[i], ax = ax[i])
            ax[i].set_title(model + " residuals^2, R: " + str(round(nrmse_spec, 5)))
        plt.tight_layout()
        plt.show()
        
    palette_ = dict(zip(modelz, sns.color_palette("tab10")[0:4]))
    # = {"uniform": "C0", "best interpolation": "C1", "zhizhuo": "C2", "expoenential": "k"}
    
    nrmse_df = pd.DataFrame(nrmses)
    nrmse_df = nrmse_df.melt()
    nrmse_df.columns = ["model", "R"]
    nrmse_df = nrmse_df.sort_values(by='R', ascending=True)
    
    modelz_ord = list(nrmse_df.model.values)
    
    #barplot
    #fig, ax = plt.subplots(1,1, figsize = (12, 6.5))
    display(nrmse_df)
    plt.figure(figsize=(6,5))
    barplot = sns.barplot(x = "model", y = "R", data = nrmse_df, palette = palette_)
    #pal.as_hex()
    plt.title("RMSE for " + zhizhuo_label)
    plt.xticks(rotation=60)
    plt.savefig('obs5_R.png')
    plt.show()
    
    
make_pretty_pics(0, show_residuals = True)

In [None]:
for i in pickle_A.experiment_lst:
    spec = i["resp_idx"]
    
    if len(spec) > 1:
        exp_resp_lst = (spec)
        exp_f = i.keys()
        print(exp_f)

In [None]:
from os.path import dirname, join as pjoin
import scipy.io as sio
new_f = sio.loadmat("/Users/hayden/Desktop/f_new.mat")
new_f = new_f["f"]

freq_imp = [list(new_f[idx])[0] for idx in exp_resp_lst]

In [None]:
new_f[exp_resp_lst[-1] +13]

In [None]:
R_results_df = pickle_A.R_results_df
display(R_results_df)
R_results_df_rel = pickle_A.R_results_df_rel
R_results_df["experiment"] = list(range(9)) * 3
R_results_df_rel = R_results_df_rel[R_results_df_rel["model"] != "interpolation"]
R_results_df_rel["experiment"] = list(range(9)) * 3
display(R_results_df_rel)

In [None]:
sns.barplot(x = "model", y = "R", data = R_results_df_rel)

In [None]:
fig, ax = plt.subplots(9, 2, figsize = (14, 25))
ax = ax.flatten()

for i in range(9):
    
    R_df_spec = R_results_df[R_results_df.experiment == i]
    R_df_rel_spec = R_results_df_rel[R_results_df_rel.experiment == i]
    
    #sns.barplot(x = "model", y = "R", data = R_df_spec)#, ax=ax[0])
    R_df_spec =R_df_spec.drop(columns = "experiment")
    R_df_rel_spec =R_df_rel_spec.drop(columns = "experiment")
    
    #sns.violinplot(x = "model", y = "R", data = self.R_results_df_rel, ax=ax[1])
    sns.barplot(x = "model", y = "R", data = R_df_spec, ci = None, ax=ax[2*i])
    
    sns.barplot(x = "model", y = "R", data = R_df_rel_spec, ci = None, ax=ax[2*i+1])
plt.show()

pickle_rDF = pickle_A.rDF
plt.figure(figsize = (16,8))
pickle_loss_df_50_50_split =  pickle_rDF[pickle_rDF.split == 0.9]
#pickle_loss_df_50_50_split = pickle_loss_df_50_50_split[pickle_loss_df_50_50_split.model != "exponential"]

mean_ =  pickle_loss_df_50_50_split.R.rolling(50).mean()
colors = ["cyan", "red"]

sns.scatterplot(x = "time", y = pickle_loss_df_50_50_split["R"], data = pickle_loss_df_50_50_split, 
                hue = "model", alpha = 0.02)

sns.lineplot(x = "time", y = mean_, data = pickle_loss_df_50_50_split, 
             hue = "model", alpha = 0.9)
plt.title("block_N_Targidx_40N_Obsidx_26")

# Examining new pickle results

In [None]:
".pickle" in 'experiment_results/publish/split_0.5/block_N_Targidx_1N_Obsidx_4.pickle'

In [None]:
#pickle_A.experiment_lst

## Import packages, glob the path lists from experiment results

Why not try something asymmetric? Asymmetric exponential weights? Otherwise we will totally collapse.

In [None]:
import tqdm
from tqdm.notebook import trange, tqdm
import glob

%run -i 'PyFiles/imports.py'
%run -i 'PyFiles/helpers.py'
%run -i "PyFiles/experiment.py"
%run -i "PyFiles/analysis.py"
medium_path_list = glob.glob('experiment_results/medium/*/*.txt')
publish_path_list = glob.glob('experiment_results/publish/*/*.txt')
publish_path_list

## September 18th Task List:


0) Continue to clean up analysis notebook
1) Work to finish grading
2) code asymmetric exponential weights
3) Work more on biological kaggle problem
4) Work on the paper (Rena parts)

## Monday Tasks
1) fix figure (Cycles 4 and 5) <br> 
2) check out the new biological kaggle problem (Cycle 3) <br> 
3) Select and extract (adjusted) indexes for block tests for Zhizhuo (Cycle 2) <br>
4) Do the parts of the paper which Rena requested

In [None]:

def get_f(path = '/Users/hayden/Downloads/f_3000.mat'):
    zhiF = loadmat(path)
    
    
    ff = list(zhiF["f"].reshape(-1,))
    print(ff[272])
#"/Users/hayden/Desktop/DL_LAB/Reservoir/MARIOS/spectrogram_data/publish/f_new.mat")
get_f()


### Experiment session: relative R

In [None]:


def rolling_rel_plot(n, rolling = 100, difference = False):
    dictLst = []
    hi = publish_sightIp.rDF
    #display(set(hi.model))
    #print("hi")

    sub_hi = hi[hi["experiment #"] == n]
    sub_hi_unif = sub_hi.R[sub_hi.model == "uniform"].values
    sub_hi_ip = sub_hi.R[sub_hi.model == "ip: linear"].values
    #we want to normalize, for the sake of comparison, the r values across the different models.
    # divide by the sum of the ip
    lenn = len(sub_hi_ip)

    denominator = np.sum(sub_hi_ip) / lenn

    sub_hi_unif = pd.Series(sub_hi_unif / denominator)
    sub_hi_ip   = pd.Series(sub_hi_ip / denominator)
    diff = sub_hi_unif - sub_hi_ip 

    dict_ = {"uniform" :  sub_hi_unif, "ip" : sub_hi_ip}

    dictLst.append(dict_)
    rolling_ip = sub_hi_ip.rolling(rolling).mean()
    rolling_unif = sub_hi_unif.rolling(rolling).mean()
    #print(np.mean(diff))
    diff_roll = diff.rolling(rolling).mean()
    xx = range(len(rolling_ip))
    if difference:
        color_ = "green" if np.mean(diff)<0 else "red"
        sns.scatterplot(x = xx, y = diff,  color = color_, alpha = 0.01)
        sns.lineplot(x = xx, y = diff_roll,  color = color_, alpha = 0.3)
        plt.ylim(-5,5)
        plt.title("Difference: rel unif - interpolation: > 0 -> rc doing better")
        return((np.mean(diff) < 0), len(diff))
    else:
        sns.lineplot(x = xx, y = rolling_ip,  color = "red", alpha = 0.3) #label = "interpolation",
        sns.lineplot(x = xx, y = rolling_unif,  color = "blue", alpha = 0.3) #label = "Uniform Random RC",
   
    



In [None]:
plt.figure(figsize = (16, 5))
better_90 = []
better_50 = []
for i in range(29):
    try:
        bet = rolling_rel_plot(i, rolling = 150, difference = True)
        if bet[1] > 400:
            better_50.append(bet[0])
        else:
            better_90.append(bet[0])
    except:
        print(i)
def quality(better):
    return(str(np.sum(better)/len(better)))
print(quality(better_50))
print(quality(better_90))

### Fix the nan interpolation results.

In [None]:
publish_sightIp.kde_plots()
publish_sightIp.experiment_lst[1].keys()

hi = publish_sightIp.get_experiment(publish_sightIp.experiment_lst[1])


In [None]:
def get_kl_div(exper_number, col_wise = False, plot = True, col_wise_method = "freq"):
    """ Calculates KL Divergence #https://towardsdatascience.com/kl-divergence-python-example-b87069e4b810
    Assuming experiment_lst[0]
    """
    xTe = np.array(publish_sightIp.experiment_lst[exper_number]["xTe"])
    pred_test = publish_sightIp.experiment_lst[exper_number]["prediction"]
   
    #print(publish_sightIp.experiment_lst[0]["nrmse"])
    def get_empirical_pdf_data(obj, plot = plot):
        obj = obj.flatten()
        nparam_density = stats.kde.gaussian_kde(obj)
        x = np.linspace(-4, 3, 200)
        nparam_density = nparam_density(x)
        #ax.plot(x, nparam_density, 'r-', label='non-parametric density (smoothed by Gaussian kernel)')
        if plot:
            plt.hist(np.array(pred_test["uniform"]).ravel(),  normed=True)
            plt.plot(x, nparam_density, 'k--', label='non-parametric density')
            plt.legend()
            plt.show()
        return(nparam_density)

    def kl_divergence(p, q):
        return np.sum(np.where(p != 0, p * np.log(p / q), 0))
    if not col_wise: #col_number is related to frequency.
        uniform_rc_epdf = get_empirical_pdf_data(obj = np.array(pred_test["uniform"]))
        linear_ip_epdf = get_empirical_pdf_data(obj = pred_test["ip: linear"])
        ground_truth_epdf = get_empirical_pdf_data(obj = xTe)
        
        kl_divergence_dict = {}
        for key in pred_test.keys():
            epdf_spec = get_empirical_pdf_data(np.array(pred_test[key]))
            kl_divergence_dict[key] = kl_divergence(epdf_spec, ground_truth_epdf)
        return(kl_divergence_dict)
    else:
        which_axis = 1 if col_wise_method == "freq" else 0
        
        kl_divergence_dict = {}
        for key in pred_test.keys():
            kl_divs_spec = []
            for i in range(xTe.shape[which_axis]):
                if col_wise_method == "freq":
                    epdf_spec_i = get_empirical_pdf_data(np.array(pred_test[key])[:, i])
                    ground_truth_epdf_i = get_empirical_pdf_data(obj = xTe[:, i])
                else:
                    epdf_spec_i = get_empirical_pdf_data(np.array(pred_test[key])[i, :])
                    ground_truth_epdf_i = get_empirical_pdf_data(obj = xTe[i, :])
                kl_spec_i = kl_divergence(epdf_spec_i, ground_truth_epdf_i)
                kl_divs_spec.append(kl_spec_i)
            kl_divergence_dict[key]=kl_divs_spec
            
        kl_divergence_df = pd.DataFrame(kl_divergence_dict)
        kl_divergence_df = kl_divergence_df #.drop(columns = 'ip: linear')
        
        #rolling
        if col_wise_method == "time":
            print("rolling")
            for col in list(kl_divergence_df.columns):
                print(col)
                print(kl_divergence_df[col])
                mean_ = kl_divergence_df[col].rolling(5).median()
                kl_divergence_df[col] = mean_
        
        len_df = len(kl_divergence_df)
        kl_divergence_df = kl_divergence_df.melt()
        kl_divergence_df.columns = ["model", "kl divergence"]
        kl_divergence_df["freq_idx"] = list(range(len_df))*len(kl_divergence_df.model.unique())
        sns.lineplot(x = "freq_idx", y = "kl divergence", data = kl_divergence_df, hue = "model")
        plt.show()
    
   

In [None]:
for i in range(2):
    kl_spec = get_kl_div(i, col_wise = True, plot = False)

### Broad kl-divergence

In [None]:
kl_div_lst= []
for i in trange(2):
    kl_spec = get_kl_div(i, plot = False)
    print(kl_spec)
    kl_div_lst.append(kl_spec)


In [None]:
kl_div_pd = pd.DataFrame(kl_div_lst)
kl_div_pd = kl_div_pd#.drop(columns = ["best interpolation", "zhizhuo"])
kl_div_pd = kl_div_pd.melt()
kl_div_pd.columns = ["model", "kl divergence"]
sns.swarmplot(x = "model", y = "kl divergence", data = kl_div_pd)

print(len(hi.f))


new_T = np.arange(min(hi.T),max(hi.T), step = 1/2751.5)
assert len(new_T) == len(hi.f)
T_dict = {"T": new_T}
savemat("new_T.mat", T_dict)

In [None]:
sns.color_palette("tab10")

In [None]:
o = sns.color_palette().as_hex()
list(np.array(o)[[2,3,1,0]])

In [None]:
obs4 = publish_sightIp.experiment_lst[0]
obs4_dictt = {"experiment": obs4}

with open('obs4.pickle', 'wb') as handle:
    pickle.dump(obs4_dictt, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
make_pretty_pics(4)

arr1 = np.array(publish_sightIp.experiment_lst[0]["prediction"]["uniform"])
arr1.shape
arr2 = np.array(publish_sightIp.experiment_lst[4]["prediction"]["uniform"])

rc1dict = {"rc_pred" : arr1,
           "interpolation" : publish_sightIp.experiment_lst[0]["prediction"]["ip: linear"],
           "ground_truth" : np.array(publish_sightIp.experiment_lst[0]["xTe"]) }
rc2dict = {"rc_pred" : arr2,
           "interpolation" : publish_sightIp.experiment_lst[1]["prediction"]["ip: linear"],
           "ground_truth" : np.array(publish_sightIp.experiment_lst[1]["xTe"]) }
savemat("rc1.mat", rc1dict)
savemat("rc2.mat", rc2dict)

In [None]:
def get_zhizhuo_freqs(nn = 1):
    for n in range(nn):
    
        #print(publish_sightIp.experiment_lst[n]["nrmse"])
        split_ = publish_sightIp.experiment_lst[n]["get_observer_inputs"]["split"]
        if split_ == 0.5:
            nrmse_dict = publish_sightIp.experiment_lst[n]["nrmse"]
            for key in nrmse_dict.keys():
                nrmse_dict[key] = np.round(nrmse_dict[key], 3)

            print(n)
            print(nrmse_dict)


            resp_idx_spec = publish_sightIp.experiment_lst[n]["resp_idx"]
            xTe = publish_sightIp.experiment_lst[n]["xTe"]
            missing_frequencies = [np.round(hi.f[idx],1) for idx in resp_idx_spec]
            print(missing_frequencies[0])
            print(missing_frequencies[-1])

            matlab_resp_idxs = [idx + 1 for idx in resp_idx_spec]
            
            print("response indices: " + str(matlab_resp_idxs[0]) + " " + str(matlab_resp_idxs[-1]))
            print("number of f missing lines: " + str(len(matlab_resp_idxs)))

            print("T's: (" + str(hi.xTe.shape[0]) + ", " + str(hi.A.shape[0]) + ")")

            obs_idx_spec = publish_sightIp.experiment_lst[n]["obs_idx"] 
            matlab_obs_idxs = [idx + 1 for idx in obs_idx_spec]
            print("total observers: " + str(len(matlab_obs_idxs)))
    
    #return(matlab_idxs)

In [None]:
#get_zhizhuo_freqs()
get_zhizhuo_freqs(24)

In [None]:
arr1 = np.array(publish_sightIp.experiment_lst[0]["prediction"]["uniform"])
arr1.shape
arr2 = np.array(publish_sightIp.experiment_lst[13]["prediction"]["uniform"])
arr2.shape
publish_sightIp.experiment_lst[13]["nrmse"]

rc1dict = {"rc_pred" : arr1,
           "interpolation" : publish_sightIp.experiment_lst[0]["prediction"]["ip: linear"],
           "ground_truth" : np.array(publish_sightIp.experiment_lst[0]["xTe"]) }
rc2dict = {"rc_pred" : arr2,
           "interpolation" : publish_sightIp.experiment_lst[13]["prediction"]["ip: linear"],
           "ground_truth" : np.array(publish_sightIp.experiment_lst[13]["xTe"]) }

In [None]:
from scipy.io import savemat
arr1 = np.array(publish_sightIp.experiment_lst[0]["prediction"]["uniform"])
arr1.shape
arr2 = np.array(publish_sightIp.experiment_lst[13]["prediction"]["uniform"])
arr2.shape
publish_sightIp.experiment_lst[13]["nrmse"]

rc1dict = {"rc_pred" : arr1,
           "interpolation" : publish_sightIp.experiment_lst[0]["prediction"]["ip: linear"],
           "ground_truth" : np.array(publish_sightIp.experiment_lst[0]["xTe"]) }
rc2dict = {"rc_pred" : arr2,
           "interpolation" : publish_sightIp.experiment_lst[13]["prediction"]["ip: linear"],
           "ground_truth" : np.array(publish_sightIp.experiment_lst[13]["xTe"]) }
savemat("rc1.mat", rc1dict)
savemat("rc2.mat", rc2dict)

In [None]:
plt.imshow(np.array(publish_sightIp.experiment_lst[0]["xTe"]), aspect = .1)

In [None]:
publish_sightIp.hyper_parameter_plot()

In [None]:
#publish_sightIp.loss_plot(split = 0.5, rolling = 40)

sns.lineplot(x = "time", y = blindIP_loss_df_50_50_split.R.rolling(50).mean(),
             data = blindIP_loss_df_50_50_split, hue = "model", alpha = 0.9)

In [None]:
blindIP_loss_df = publish_sightIp.rDF
plt.figure(figsize = (16,8))
blindIP_loss_df_90_10_split = publish_sightIp.rDF[publish_sightIp.rDF.split == 0.9]
#blindIP_loss_df_50_50_split[blindIP_loss_df_50_50_split.model != "exponential"] = blindIP_loss_df_50_50_split
sns.scatterplot(x = "time", y = "R", data = blindIP_loss_df_90_10_split, hue = "model", alpha = 0.02)

sns.lineplot(x = "time", y = blindIP_loss_df_90_10_split.R.rolling(50).mean(),
             data = blindIP_loss_df_90_10_split, hue = "model", alpha = 0.9)


### Zhizhuo results

In [None]:
%run -i 'PyFiles/imports.py'
%run -i 'PyFiles/helpers.py'
%run -i "PyFiles/experiment.py"
%run -i "PyFiles/analysis.py"
files2import = glob.glob('/Users/hayden/Desktop/zhizhuo_block_results/*.mat')
files2import

In [None]:
data_lst = []
for i in files2import:
    data_lst.append(loadmat(i))

In [None]:
y_real51, y_hat101, y_real101, y_hat51 = data_lst
truth_51 = y_real51[list(y_real51.keys())[3]]
pred_51  = y_hat51[list(y_hat51.keys())[3]]
print(nrmse(pred_51, truth_51))
plt.imshow(truth_51, aspect = 4)
plt.show()
plt.imshow(pred_51, aspect = 4)
plt.show() 

In [None]:
targ_freqs_51 = list(range(1740, 2240 + 10, 10)) 
obs_freqs51a = list(range(1740  - (26*10), 1740, 10))
obs_freqs51b = list(range(2240 + 10, 2240 + (26*10) + 10, 10))
assert targ_freqs_51[-1] + 10 == obs_freqs51b[0]
assert targ_freqs_51[0] - 10 == obs_freqs51a[-1]


In [None]:

truth_101 = y_real101[list(y_real101.keys())[3]]
pred_101  = y_hat101[list(y_hat101.keys())[3]]
print(nrmse(pred_101, truth_101))
plt.imshow(truth_101, aspect = 4)
plt.show()
plt.imshow(pred_101, aspect = 4)
plt.show() 

In [None]:


zhizhuo51 = EchoStateExperiment("medium", obs_freqs = obs_freqs51a + obs_freqs51b, target_freqs = targ_freqs_51 )
zhizhuo51.f[zhizhuo51.resp_idx[0]]
#print(zhizhuo51.A.shape)
#for i in zhizhuo51.A.shape[1]
hi = """
for i in range(0,1000):
    his = truth_51.T[0:]
    mine = zhizhuo51.A[:, i ] #511:
    assert his.shape == mine.shape, str(his.shape) + str(mine.shape)
    if np.array_equal(his,mine):
        print(i)
"""

def retrieve_zhizhuo_series(series_idx):
    mine = zhizhuo51.A[ 511:, 153 ]
    his = truth_51[series_idx,:]
    nrmse_lst = []
    for i in range(0, 1000):
        mine = zhizhuo51.A.T[ i , 511: ]
        nrmse_spec = nrmse(his,mine)

        nrmse_lst.append(nrmse_spec)
    nrmse_series = pd.Series(nrmse_lst)
    candidate_idx = nrmse_series.idxmin()
    candidate = zhizhuo51.A.T[candidate_idx , 511: ]
    plt.plot(candidate, "--", linewidth = 5, alpha = 0.6, label = "actual data") 
    plt.plot(his, ":r", label = "zhizhuo testset", linewidth = 3, alpha = 0.5)
    titl = str(candidate_idx) + " idx: " +  str(zhizhuo51.f[candidate_idx]) + " Hz"
    plt.title(titl)
    plt.legend()
    print(truth_51.shape)
retrieve_zhizhuo_series(0)
plt.show()
retrieve_zhizhuo_series(-1)
plt.show()
plt.plot(zhizhuo51.A[511:, 249])

#plt.plot(zhizhuo51.A[174, :])
#zhizhuo51.get_observers(split = 0.4995, method = "exact", plot_split = True)
#zhizhuo51.runInterpolation()

In [None]:
assert zhizhuo51.xTe.T.shape == truth_51.shape, str(zhizhuo51.xTe.T.shape) + " != " + str(truth_51.shape)
sns.heatmap(truth_51)
plt.show()
sns.heatmap(zhizhuo51.xTe.T)
plt.show() 
plt.imshow(pred_51, aspect = 4)
plt.show() 

plt.imshow(zhizhuo51.ip_res["prediction"].T, aspect = 4)
plt.show() 

In [None]:
plt.imshow(zhizhuo51.xTe)

### End Zhizhuo block results

In [None]:
#TODO: Step 2: store hyper-parameter-results: Let's get some nice hyper-parameter plots.
#TODO: Step 1: check if observers are correct:
#TODO: fix


def check_shape_obs(file = "default"):
    """
    Check the shape
    """
    if file == "default":
        nf = get_new_filename(exp = exp, current = True)
    else:
        nf = file
    with open(nf) as json_file: # 'non_exp_w.txt'
        datt = json.load(json_file)
    #datt = non_exp_best_args["dat"]
    #datt["obs_tr"], datt["obs_te"]   = np.array(datt["obs_tr"]), np.array(datt["obs_te"])
    #datt["resp_tr"], datt["resp_te"] = np.array(datt["resp_tr"]), np.array(datt["resp_te"])
    return(datt)





#experiment.save_json(exp = False)
#fp = bp + 'targetKhz:_0.01__obskHz:_0.01.txt'
#fp = bp + 'targetKhz:_0.02__obskHz:_0.01.txt'
def topline(spec_path, 
            base_path = "/Users/hayden/Desktop/experiment_results/2k/medium/",
            #base_path = #"./experiment_results/..."
            verbose = False,
            print_filestructure = False):
    """
    """
    
    print(base_path)
    fp = base_path + spec_path
    
    hi = load_data(file = fp)
    if print_filestructure == True:
        for i in hi.keys():
            print(i + "/")

            if type(hi[i]) == dict:

                for j in hi[i].keys():
                    print("    " + j)
                    
    if verbose == True:
        print("DATA STRUCTURE: (it's a dict)")
        print("/n inputs:")
        print(hi["experiment_inputs"])
        print(hi["get_observer_inputs"])

        print("/n key saved values:")
        print(hi["best arguments"])
        print(hi["nrmse"])
    return(hi)



# New experiments

In [None]:
def ifdel(dictt, key):
    """ If a key is in a dictionary delete it. Return [modified] dictionary.
    """
    try:
        del dictt[key]
        return(dictt)
    except:
        return(dictt)
ifdel({"a":1, "b" : 2}, "a")

In [None]:
assert(check_for_duplicates(complete_experiment_path_lst) != True), "duplicates found"
%run -i '../MARIOS/PyFiles/imports.py'
%run -i '../MARIOS/PyFiles/helpers.py'
%run -i "../MARIOS/PyFiles/experiment.py"
bp_ = ""# "./experiment_results/"



# fix nrmse calculation AND interpolation
for i in trange(len(experiment_lst), desc='experiment list, fixing interpolation...'): 
    exper_ = experiment_lst[i]
    exper_obj = get_experiment(exper_)
    
    train_set, test_set = exper_obj.xTr, exper_obj.xTe
    models_spec = list(exper_["prediction"].keys())
    
    for model_ in models_spec:
        pred_ = exper_["prediction"][model_]
        corrected_nrmse = nrmse(pred_, test_set)
        exper_["nrmse"][model_] = corrected_nrmse
        
    experiment_lst[i] = fix_interpolation(exper_, method = "linear") 
    



In [None]:
def optimize_combination(ip, exp, Test, n = 10, optimize = True):
    
    if optimize == True:
        nrmses = []
        predictions = []
        #(np.array(Test) + np.array(predictions["exponential"])) / 2
        vals =[]
        for i in range(n):
            a = i / n
            b = 1 - a
            hybrid_pred = ((1-a) * ip + a * exp)
            predictions += [hybrid_pred]
            nrmses += [nrmse(hybrid_pred , Test) ]
            vals += [a]
            #nrmse(predictions["hybrid"], Test) 
        idx = np.argmin(nrmses) 
        print(nrmses)
        print("A!! " + str(vals[idx]))
        best_prediction = predictions[idx]
        best_nrmse      = nrmses[idx]
        return(best_prediction, best_nrmse)
    else:
        hybrid_pred  = (0.5 * ip) + (0.5 * exp)
        hybrid_nrmse = nrmse(hybrid_pred , Test)
        return(hybrid_pred, hybrid_nrmse)
    



### add hybrids
for i in list(range(len(experiment_lst))):
    experiment_lst[i]
    predictions_= experiment_lst[i]["prediction"]
    Train, Test = recover_test_set(experiment_lst[i])
    #hybrid_pred_, hybrid_R = optimize_combination(np.array(predictions_["interpolation"]),
    #                                                                  np.array(predictions_["exponential"]),
    #                                                                  Test, optimize = False)
    experiment_lst[i]["nrmse"]["hybrid"]      = hybrid_R
    experiment_lst[i]["prediction"]["hybrid"] = hybrid_pred_
    

In [None]:

def get_df():
    IGNORE_IP = False

    def quick_dirty_convert(lst):
        if IGNORE_IP == True:
            lst *= 2
        else:
            lst *= 4
        pd_ = pd.DataFrame(np.array(lst).reshape(-1,1))
        return(pd_)


    idx_lst = list(range(len(experiment_lst)))
    #idx_lst *= 3
    #idx_lst = pd.DataFrame(np.array(idx_lst).reshape(-1,1))

    idx_lst = quick_dirty_convert(idx_lst)

    obs_hz_lst, targ_hz_lst, targ_freq_lst = [], [], []

    for i, experiment in enumerate(experiment_lst):
        #print(experiment['experiment_inputs'].keys())
        targ_hz = experiment["experiment_inputs"]["target_hz"]
        obs_hz  = experiment["experiment_inputs"]["obs_hz"]
        targ_freq = experiment["experiment_inputs"]['target_frequency']

        if experiment["experiment_inputs"]["target_hz"] < 1:
            targ_hz *= 1000*1000
            obs_hz  *= 1000*1000
        obs_hz_lst  += [obs_hz]
        targ_hz_lst += [targ_hz]
        targ_freq_lst += [targ_freq]


        hz_line = {"target hz" : targ_hz }
        hz_line = Merge(hz_line , {"obs hz" : obs_hz })

        #print(hz_line)
        df_spec= experiment["nrmse"]

        #df_spec = Merge(experiment["nrmse"], {"target hz": targ_hz})
        df_spec = pd.DataFrame(df_spec, index = [0])

        df_spec_rel = df_spec.copy()
        #/df_spec_diff["uniform"]
        #df_spec_diff["rc_diff"]

        if IGNORE_IP == True:
            df_spec_rel = df_spec_rel / experiment["nrmse"]["uniform"]#
        else:
            df_spec_rel = df_spec_rel / experiment["nrmse"]["interpolation"]



        #print( df_spec_rel)
        #print(experiment["experiment_inputs"].keys())
        if i == 0:
            df      = df_spec
            df_rel  = df_spec_rel


        else:
            df = pd.concat([df, df_spec])
            df_rel = pd.concat([df_rel, df_spec_rel])


    df_net = df_rel.copy()

    obs_hz_lst, targ_hz_lst = quick_dirty_convert(obs_hz_lst), quick_dirty_convert(targ_hz_lst)
    targ_freq_lst = quick_dirty_convert(targ_freq_lst)
    #display(df)
    if IGNORE_IP == True:
        df_rel = df_rel.drop(columns = ["interpolation"])
        df  = df.drop(columns = ["interpolation"])
    #df_rel  = df_rel.drop(columns = ["hybrid"])
    #df      = df.drop(    columns = ["hybrid"])

    df, df_rel = pd.melt(df), pd.melt(df_rel)
    df  = pd.concat( [idx_lst, df,  obs_hz_lst, targ_hz_lst, targ_freq_lst] ,axis = 1)

    df_rel = pd.concat( [idx_lst, df_rel,  obs_hz_lst, targ_hz_lst, targ_freq_lst], axis = 1)

    #df_diff = pd.concat( [idx_lst, df_diff,  obs_hz_lst, targ_hz_lst, targ_freq_lst], axis = 1)

    col_names = ["experiment", "model", "nrmse", "obs hz", "target hz", "target freq" ]
    df.columns, df_rel.columns    = col_names, col_names


In [None]:
df = df.dropna()
df_diff = df[df["model"] == "uniform"]
df_diff.model = "diff"
nrmse_ = (df[df["model"] == "exponential"]["nrmse"].values - df_diff["nrmse"].values) * 100
df_diff.nrmse = nrmse_
def plot_loss_reduction():

    df_diff = df[df["model"] == "uniform"]
    df_diff.model = "diff"
    #df_diff["nrmse"] = df_diff["nrmse"] - df[df["model"] == "exponential"]["nrmse"]

    
    
    #df[df["model"] == "exponential"] 

    nrmse_ = (df[df["model"] == "exponential"]["nrmse"].values - df_diff["nrmse"].values) * 100
    df_diff.nrmse = nrmse_
    pct = round(np.mean(nrmse_ < 0) * 100,2)
    print("odds of loss reduction with exponential weights vs uniform weights: " + str(pct) + "%")
    print("mean % loss change: " + str(round(np.mean(nrmse_))) + "%")

    #sns.catplot(x = "model", y = "nrmse", data = df_diff)
    fig, ax = plt.subplots(1,1,figsize = (10, 6))
    plt.xlabel("%change in loss")
    plt.ylabel("density")
    sns.kdeplot(df_diff["nrmse"], shade = True)
    plt.axvline(x=0, color = "black", label = "zero")
    plt.axvline(x=np.mean(df_diff["nrmse"]), color = "red", label = "mean loss reduction")
    
    plt.legend()
    plt.show()
plot_loss_reduction()

In [None]:
def plot_loss_reduction2d(xx = "target hz"):
    
    fig, ax = plt.subplots(1, 1, figsize = (6,6))
    sns.kdeplot(df_diff[xx], df_diff["nrmse"],
                     cmap="Blues", shade=True, shade_lowest=False, ax = ax)#, alpha = 0.5)
    #plt.ayvline(y=0, color = "black", label = "zero")
    sns.scatterplot(x = xx, y = "nrmse", data = df_diff, ax = ax,  linewidth=0, color = "black", alpha = 0.4)
    plt.title("2d kde plot: nrmse vs target hz")
    plt.axhline(y=0.5, color='black', linestyle='-')
    ax.set_ylabel("pct loss exp vs unif RC")
plot_loss_reduction2d()
plot_loss_reduction2d(xx = "obs hz")
plot_loss_reduction2d(xx = "target freq")

In [None]:
def plot_nrmse_kde_2d(xx = "target hz", 
                      log = True, 
                      alph = 1, 
                      black_pnts = True, 
                      models = {"interpolation" : "Greens", "exponential" : "Reds", "uniform" : "Blues"},
                      enforce_bounds = False,
                      target_freq = None):
    """
    #todo description
    """
    if target_freq != None:
        df_spec = df[df["target freq"] == target_freq]
    else:
        df_spec = df.copy()
            
    
    def plot_(model_, colorr, alph = alph,  black_pnts =  black_pnts):
        if colorr == "Blues":
            color_ = "blue"
        elif colorr == "Reds":
            color_ = "red"
        elif colorr == "Greens":
            color_ = "green"
            
        df_ = df_spec[df_spec.model == model_] #df_ip  = df[df.model == "interpolation"]
        
        #display(df_)
            
        
        hi = df_["nrmse"]
        cap = 1
        if log == True:
            hi = np.log(hi)/ np.log(10)
            cap = np.log(cap) / np.log(10)
        
        
        sns.kdeplot(df_[xx], hi, cmap= colorr, 
                    shade=True, shade_lowest=False, ax = ax, label = model_, alpha = alph)#, alpha = 0.5)
        
        if  black_pnts == True:
            col_scatter = "black"
        else:
            col_scatter = color_
        
        sns.scatterplot(x = xx, y = hi, data = df_,  linewidth=0, 
                        color = col_scatter, alpha = 0.4, ax = ax)
        
        plt.title("2d kde plot: nrmse vs " + xx)
        
        plt.axhline(y=cap, color=color_, linestyle='-', label = "mean " + str(model_), alpha = 0.5)
        sns.lineplot(y = hi, x = xx, data = df_ , color = color_)#, alpha = 0.2)
        if enforce_bounds == True:
            ax.set_ylim(0,1)
        if log == True:
            ax.set_ylabel("log( NRMSE) ")
        else: 
            ax.set_ylabel("NRMSE")
            
    fig, ax = plt.subplots(1, 1, figsize = (12,6))
    for model in list(models.keys()):
        print(model)
        plot_(model, models[model], alph = alph)
    #plot_("interpolation", "Blues")
    #plot_("exponential", "Reds", alph = alph)
    
def kde_plots( target_freq = None, 
               log = False, 
               model = "uniform", 
               models = {"interpolation" : "Greens", "exponential" : "Reds", "uniform" : "Blues"},
               enforce_bounds = True,
               split = None):
    """
    HEATMAP EXAMPLE:
                     enforce_bounds = True)
    flights = flights.pivot("month", "year", "passengers") #y, x, z
    ax = sns.heatmap(flights)
    plot_nrmse_kde_2d(**additional_arguments, 
                      models = {"interpolation" : "Greens", "exponential" : "Reds", "uniform" : "Blues"})
    
    plot_nrmse_kde_2d(xx = "obs hz", **additional_arguments, 
                      models = {"interpolation" : "Greens", "exponential" : "Reds", "uniform" : "Blues"})
    """
    
    additional_arguments ={ "black_pnts" : False, 
                           "alph" : 0.3, 
                           "target_freq" : target_freq}    
    
    cmap = "coolwarm"
    
   
    def add_noise(np_array, log = log):
        sizee = len(np_array)
        x =  np.random.randint(100, size = sizee) + np_array 
        
        return(x)
    
    nrmse_dict = {}
    
    for i, model in enumerate(["uniform", "exponential", "interpolation"]):
        df_ = df[df.model == model ]
        
        xx, yy = add_noise(df_["target hz"]), add_noise(df_["obs hz"])

        nrmse= df_["nrmse"]
        if log == True:
            print("hawabunga")
            nrmse = np.log(nrmse)
        nrmse_dict[model] = nrmse
    
    
    
    """
    nrmse_diff = nrmse_dict["exponential"].values.reshape(-1,)  - nrmse_dict["uniform"].values.reshape(-1,) 
    print("(+): " + str(np.sum((nrmse_diff > 0)*1)))
    
    print("(-): " + str(np.sum((nrmse_diff < 0)*1)))
    
    
    display(nrmse_diff)
    xx, yy = add_noise(df_["target hz"]), add_noise(df_["obs hz"])
    #sns.distplot(nrmse_diff, ax = ax[2])
    sns.scatterplot(x = xx, y = yy, data = df_, ax = ax[2], palette=cmap, alpha = 0.9, s = 50, hue = nrmse_diff) #size = nrmse,
    ax[2].set_title(" diff: exponential - uniform" )
    plt.show()
    """
    
    plot_nrmse_kde_2d(**additional_arguments, log = False, 
                      models = models, #{"exponential" : "Reds", "uniform" : "Blues", "interpolation" : "Greens"},
                     enforce_bounds = True)
    
    
    plot_nrmse_kde_2d(xx = "obs hz", **additional_arguments, log = False, 
                       models = models, #{"exponential" : "Reds", "uniform" : "Blues", "interpolation" : "Greens"},
                       enforce_bounds = True)
    
               
    
    


kde_plots(models = {"interpolation" : "Greens", "hybrid" : "Reds"})#, "uniform" : "Blues"},)


In [None]:
kde_plots(models = {"interpolation" : "Greens", "exponential" : "Blues", "uniform" : "Reds"})

In [None]:
from scipy.ndimage import gaussian_filter
 
def optimize_combination(ip, exp, Test, n = 10, optimize = True):
    
    if optimize == True:
        nrmses = []
        predictions = []
        #(np.array(Test) + np.array(predictions["exponential"])) / 2
        vals =[]
        for i in range(n):
            a = i / n
            b = 1 - a
            hybrid_pred = ((1-a) * ip + a * exp)
            predictions += [hybrid_pred]
            nrmses += [nrmse(hybrid_pred , Test) ]
            vals += [a]
            #nrmse(predictions["hybrid"], Test) 
        idx = np.argmin(nrmses) 
        print(nrmses)
        print("A!! " + str(vals[idx]))
        best_prediction = predictions[idx]
        best_nrmse      = nrmses[idx]
        return(best_prediction, best_nrmse)
    else:
        hybrid_pred  = (0.5 * ip) + (0.5 * exp)
        hybrid_nrmse = nrmse(hybrid_pred , Test)
        return(hybrid_pred, hybrid_nrmse)
    



In [None]:
show_images(experiment_lst[0])

In [None]:



fig, ax = plt.subplots(1, 2, figsize = (14,4))
sns.violinplot( y = "nrmse" , x = "model", data = df, ax = ax[0])
sns.boxplot( y = "nrmse" , x = "model", data = df, ax = ax[1])
ax[0].set_title("General NRMSE vs MODEL across different RC's")
ax[1].set_title("General NRMSE vs MODEL across different RC's")
ax[0].set_ylabel("NRMSE"); ax[1].set_ylabel("NRMSE")



fig, ax = plt.subplots(1, 2, figsize = (14,4))
sns.violinplot( y = "nrmse" , x = "model", data = df_rel, ax = ax[0])
sns.boxplot( y = "nrmse" , x = "model", data = df_rel, ax = ax[1])
ax[0].set_title("Relative NRMSE vs Interpolation model across different RC's")
ax[1].set_title("Relavite NRMSE vs Interpolation model across different RC's")
ax[0].set_ylabel("Relative NRMSE"); ax[1].set_ylabel("Relative NRMSE")

fig, ax = plt.subplots(1, 1, figsize = (7,4))
sns.violinplot( y = "nrmse" , x = "model", data = df_diff, ax = ax)
#sns.boxplot( y = "nrmse" , x = "model", data = df_rel, ax = ax[1])
ax.set_title("Relative NRMSE: ([exp nrmse] -  [unif nrmse])/[unif_nrmse] * 100")
ax.set_ylabel("Relative NRMSE"); ax.set_ylabel("Relative NRMSE")


In [None]:
sns.catplot(y = "nrmse" , x = "model", hue ="target hz", data = df)
plt.title("General NRMSE vs MODEL across different RC's")
sns.catplot(y = "nrmse" , x = "model", hue ="target hz", data = df_rel)
plt.title("Relavite NRMSE vs Interpolation model across different RC's")

In [None]:
hi = df["nrmse"] #df["nrmse"]np.log(df["nrmse"])
fig, ax = plt.subplots(1,1, figsize = (12,6))
sns.scatterplot( y = hi, x = "target hz", data = df, hue = "model", alpha = 0.5, legend = False)
sns.lineplot( y = hi, x = "target hz", data = df, hue = "model", alpha = 0.5)
cap = np.log(1.0)/np.log(10)
plt.axhline(y=cap, color="black", linestyle='-', label = "mean " + str(model_), alpha = 0.5)
plt.ylabel("log( NRMSE)")

fig, ax = plt.subplots(1,1, figsize = (12,6))
sns.scatterplot( y = hi, x = "target hz", data = df, hue = "model", alpha = 0.5, legend = False)
sns.lineplot( y = hi, x = "target hz", data = df, hue = "model", alpha = 0.5)
cap = np.log(1.0)/np.log(10)
plt.axhline(y=cap, color="black", linestyle='-', label = "mean " + str(model_), alpha = 0.5)
plt.ylabel("log( NRMSE)")
plt.ylim((-1.5,cap))



hi = df["nrmse"]
fig, ax = plt.subplots(1,1, figsize = (12,6))
sns.scatterplot( y = hi, x = "target hz", data = df, hue = "model", alpha = 0.5, legend = False)
sns.lineplot( y = hi, x = "target hz", data = df, hue = "model", alpha = 0.5)
plt.ylabel("NRMSE")

In [None]:
hi = np.log(df["nrmse"])
fig, ax = plt.subplots(1,1, figsize = (12,6))
sns.scatterplot( y = hi, x = "obs hz", data = df, hue = "model", alpha = 0.5)
sns.lineplot( y = hi, x = "obs hz", data = df, hue = "model", alpha = 0.5)
ax.set_ylabel("Log ( NRMSE )")

In [None]:
sns.catplot(y = "nrmse" , x = "model", hue ="target freq", data = df)
plt.title("General NRMSE vs MODEL across different RC's")
plt.ylim((0,1.5))
sns.catplot(y = "nrmse" , x = "model", hue ="target freq", data = df_rel)
plt.title("Relavite NRMSE vs Interpolation model across different RC's")

In [None]:
sns.catplot(y = "nrmse" , x = "model", hue ="obs hz", data = df)
plt.title("General NRMSE vs MODEL across different RC's")
sns.catplot(y = "nrmse" , x = "model", hue ="obs hz", data = df_rel)
plt.title("Relavite NRMSE vs Interpolation model across different RC's")

# Old experiments

In [None]:
for i, experiment in enumerate(experiment_lst):
    #print(experiment['get_observer_inputs'].keys())
    split = experiment['get_observer_inputs']["split"]
    targ_hz = experiment['experiment_inputs']["target_hz"]
    targ_idx_LB, targ_idx_UB = experiment["resp_idx"][0], experiment["resp_idx"][-1]
    obs_hz = experiment['experiment_inputs']["obs_hz"]
    f = np.array(experiment_8_obj.f)
    obs_idx = experiment["obs_idx"] 

    obs_idx  = [int(j) for j in experiment["obs_idx"] ]
    obs_freq = [max(f) - f[j] for j in obs_idx]
    
    
    print("\nexperiment: " + str(i) + ", target hz: " + str(targ_hz) + ", obs hz: " + str(obs_hz) +
         ", split: " + str(split))

    
    print("target idx: [" + str(targ_idx_LB) + ", " + str(targ_idx_UB) + "]")
    print("target freq: [" + str(max(f) - f[targ_idx_LB]) + ", " + str(max(f) - f[targ_idx_UB]) + "]")
    print("obs idx: " + str(obs_idx))
    print("obs freq: " + str(obs_freq))
    print(experiment_8_obj.A.shape[0] - np.array(experiment["prediction"]["interpolation"]).shape[0])
    print(experiment_8_obj.A.shape[0])
    #print(experiment["resp_idx"])

In [None]:
def show_exp_weights(json_obj, llambda = None):
    print(json_obj.keys())
    esn_ = EchoStateNetwork(**json_obj["best arguments"]["exponential"], plot = True)
    esn_.obs_idx  = json_obj["obs_idx"]
    esn_.resp_idx = json_obj["resp_idx"]
    if llambda != None:
        esn_.llambda = llambda
    esn_.get_exp_weights()


for i in experiment_lst:
    show_exp_weights(i)  
#show_exp_weights(experiment_2)  



In [None]:
show_exp_weights(i, llambda = 10**-3) 

In [None]:
10**-2 
np.log(10**-4)/np.log(10)


In [None]:
sns.catplot(y = "nrmse" , x = "model", hue ="target hz", data = df)
plt.title("General NRMSE vs MODEL across different RC's")
sns.catplot(y = "nrmse" , x = "model", hue ="target hz", data = df_rel)
plt.title("Relavite NRMSE vs Interpolation model across different RC's")

In [None]:
sns.catplot(y = "nrmse" , x = "model", hue ="obs hz", data = df)
plt.title("General NRMSE vs MODEL across different RC's")
sns.catplot(y = "nrmse" , x = "model", hue ="obs hz", data = df_rel)
plt.title("Relavite NRMSE vs Interpolation model across different RC's")

In [None]:


def merge_unif_exp(fp_unif, fp_exp):
    exp_dat = load_data(fp_exp)
    unif_dat = load_data(fp_unif)
    assert exp_dat["prediction"]["interpolation"] == unif_dat["prediction"]["interpolation"], "something is wrong!"
    joint_dat = unif_dat.copy()
    for i in ["prediction", "nrmse", "best arguments"]:
        exp_dict = {"exponential" : exp_dat[i]["exponential"]}
        joint_dat[i] = Merge(joint_dat[i], exp_dict)
    print(joint_dat["best arguments"])
        
     
    return(joint_dat)
#0.5_1.0


In [None]:
display(df)
uniform_ = df_rel[df_rel.model == "uniform"]
exp_ = df_rel[df_rel.model == "exponential"]

fig, ax = plt.subplots(1,2, figsize = (12,6))
sns.kdeplot(uniform_["target hz"], uniform_["nrmse"],
                 cmap="Reds", shade=True, shade_lowest=False, ax = ax[0])#, alpha = 0.5)
sns.kdeplot(exp_["target hz"], exp_["nrmse"],
                 cmap="Blues", shade=True, shade_lowest=False, ax = ax[1])#, alpha = 0.5)
ax[0].set_ylim(0,0.3)
ax[1].set_ylim(0,0.3)



In [None]:
#SMOOTH
#experiment_5 = load_data('/Users/hayden/Desktop/DL_LAB/Reservoir/MARIOS/experiment_results/2k/medium/split_0.5/targetKhz:_0.02__obskHz:_0.01.txt')
#                         #bp = '/Users/hayden/Desktop/')
#experiment_5_obj = get_experiment(experiment_5)
%run -i '../MARIOS/PyFiles/imports.py'
%run -i '../MARIOS/PyFiles/helpers.py'
%run -i "../MARIOS/PyFiles/experiment.py"

experiment_ = experiment_5.copy()
for key, prediction in experiment_["prediction"].items():
    prediction = np.array(prediction)
    Train, Test = recover_test_set(experiment_)
    
    experiment_["nrmse"][key] = nrmse(prediction, Test)
get_experiment(experiment_)

In [None]:
#SMOOTH
experiment_5 = load_data('/Users/hayden/Desktop/DL_LAB/Reservoir/MARIOS/experiment_results/2k/medium/split_0.5/targetKhz:_0.02__obskHz:_0.01.txt')
                         #bp = '/Users/hayden/Desktop/')
experiment_ = experiment_5.copy()
for key, prediction in experiment_["prediction"].items():
    prediction = np.array(prediction)
    Train, Test = recover_test_set(experiment_)
    
    experiment_["nrmse"][key] = nrmse(prediction, Test)
experiment_5_obj = get_experiment(experiment_)

# Librosa

In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
from scipy import signal
from scipy.io import wavfile
import IPython.display as ipd
import os
import pickle
import IPython.display as ipd
def Audio(transform):
    obj = ipd.Audio(data = transform["signal"], rate = transform["sr"])
    return(obj)

%run -i '../MARIOS/PyFiles/imports.py'
%run -i '../MARIOS/PyFiles/helpers.py'
%run -i "../MARIOS/PyFiles/experiment.py"


pth_high = "/Users/hayden/Desktop/18th_century_high.m4a"

#pth = "/Users/hayden/Desktop/19th_century_high.wav"#"/Users/hayden/Desktop/get_free.wav"

def partition_song(array, x_start, x_len,  y_start, y_stop):
    print(array.shape)
    x_stop = x_start + 3000
    partitioned_array = array[y_start:y_stop, x_start:x_stop].copy()
    print(partitioned_array.shape)
    return(partitioned_array)

def get_transform(trans_type,
                  hop_length_mult = 1, pth = "/Users/hayden/Desktop/18th_century_dense.m4a",
                  n_bins_mult = 2, bins_per_octave =12, sr = None, y_axis = "hz", filter_scale = 1,
                  norm = 1, method = "db", label = None, partition = False, save_path = None
                  ):
    fmin   = librosa.note_to_hz('C0')
    if trans_type == 'cqt':
        
        y_axis = "cqt_note"
    
    assert label, "enter a label so that you can save a file"
    assert trans_type in ["stft", "cqt", "hybrid_cqt", "pseudo_cqt", "vqt"]
    
    x, sr = librosa.load(pth, sr=None)
    #C = np.abs(librosa.cqt(x, sr=None))
    
    #print("transform_type: " + trans_type)
    # Librosa transform functions dictionary:
    trans_dict = { "stft" : librosa.stft,
                   "cqt" : librosa.cqt,
                   "hybrid_cqt": librosa.hybrid_cqt,
                   "pseudo_cqt": librosa.pseudo_cqt,
                   "vqt" : librosa.vqt
    }
    #n_bins = int(84*n_bins_mult)
    n_bins = n_bins_mult
    default_args = {
         "stft" : {
              "n_fft" : 512
              #"sr" : sr
         },
         "cqt" : {
                  "fmin" : fmin,
                  'pad_mode': 'wrap',
                  #"fmin": FMIN, , 
                  "sr" : sr,
                  #"n_bins": 84,
                  #"hop_length" : 2**6 * hop_length_mult
                  #"n_bins" : n_bins #"norm": norm,
                  }, #, "hop_length" : 64**hop_length_exp "fmin"  #"filter_scale": filter_scale : 1 #"res_type": "fft", #"sparsity": 0.1
         "hybrid_cqt": {},
         "pseudo_cqt": {},
         "vqt" : {}
        
    }
    
    #x = librosa.resample(x, orig_sr = sr, target_sr = sr*5) #NOPE

    N_FFT = len(x)
    N_FFT_exp = 4
    f = trans_dict[trans_type]
    X = f(x, **default_args[trans_type]) #stft #, n_fft =int(N_FFT/np.exp(N_FFT_exp)

    Xdb = librosa.amplitude_to_db(abs(X))
    Xpow = np.log10( librosa.db_to_power(Xdb))
    matrix2plot = Xpow if method == "pow" else Xdb
    
    if partition:
        matrix2plot = partition_song(matrix2plot, partition, 15000, 0, 800)
    
    
    fig, ax = plt.subplots(1,1, figsize=(7, 5)) 
    im1 = librosa.display.specshow(matrix2plot, x_axis='time', y_axis=y_axis, ax = ax)
    plt.colorbar(im1, format='%+2.0f dB')
    
  
    #Librosa frequency functions 
    f_dict = { "stft": librosa.fft_frequencies,
               "cqt" : librosa.cqt_frequencies,
               "hybrid_cqt": None,
               "pseudo_cqt": None,
               "vqt" : None}
    
    
    f_default_args_stft = {"stft": {"sr" : sr, "n_fft" : 512}}            
    
    try:
        f_default_args_cqt = {"cqt" : {"fmin" : fmin, "n_bins" : Xpow.shape[0]}}
        f_default_args     = Merge(f_default_args_stft, f_default_args_cqt)
        
    except NameError:   
        f_default_args = f_default_args_stft
        
    freq_fun = f_dict[trans_type]
    freqs    = freq_fun(**f_default_args[trans_type])
    
    #"type": 
    #consider putting all transform types in this position.
    transform = {"signal": x,
                 "sr" : sr,
                 "transform": {
                               "Xdb"  : Xdb,
                               "Xpow" : Xpow,
                               "f"  :  freqs
                               }
                }
    
    #assertion to avoid incorrect frequency length.
    err_msg = str(len(freqs.tolist())) + "   " + str(Xpow.shape[0])
    assert len(freqs) == Xpow.shape[0], err_msg
    
    if save_path:
        save_path = save_path + "_" + trans_type
        print("saving at: " + str(save_path))
        save_pickle(save_path, transform)
    
    audio = Audio(transform)
    plt.title(label)
    
    print_msg =  "\x1b[31m\""+ 'pickle_load(' + save_path + ')' + "\"\x1b[0m"
    print(" to load this transform type " + print_msg)
    
    
    #print(plt.get_xydata() )
    #print(labels)
    #if trans_type == "cqt":
    #    print("C1 = " + str(librosa.note_to_hz('C0')))
    return(audio) #transform, 


def save_pickle(path, transform):
    save_path = "./pickle_files/spectrogram_files/" + path +".pickle"
    with open(save_path, 'wb') as handle:
        pickle.dump(transform, handle, protocol=pickle.HIGHEST_PROTOCOL)

def load_pickle(path):
    path = "./pickle_files/spectrogram_files/" + path +".pickle"
    with open(path, 'rb') as handle:
        b = pickle.load(handle)
    return(b)
#self.librosa_outfile = librosa_outfile
#self.spectogram_path = spectogram_path

#'./pickle_files/cqt_low_pitch.pickle'

In [None]:
get_transform("stft", label = "19th century male voice",
              pth = "/Users/hayden/Desktop/DL_lab/wav_files/computer_male.mp3",
              save_path = "19th_century_male")

In [None]:
get_transform("stft", label = "19th century female voice", 
              pth = "/Users/hayden/Desktop/DL_lab/wav_files/computer_female.mp3", save = False)

In [None]:
get_transform("cqt", label = "19th century male cqt transform", 
              pth = "/Users/hayden/Desktop/DL_lab/wav_files/computer_male.mp3")


In [None]:
get_transform("cqt", label = "18th_cqt_f", pth = "/Users/hayden/Desktop/computer_female.mp3", save = False)

In [None]:
get_transform("cqt", label = "18th_cqt_f", pth = "/Users/hayden/Desktop/DL_lab/computer_female.mp3")

In [None]:
get_transform("cqt", label = "18th_cqt_high",  pth = pth_high)

In [None]:
pth_free = "/Users/hayden/Desktop/get_free.wav"
get_transform("cqt", label = "CQT: Get Free",  pth = pth_free, partition = 1000)

In [None]:
pth_free = "/Users/hayden/Desktop/get_free.wav"
#get_transform("stft", label = "FT: Get Free",  pth = pth_free)

In [None]:
#cqt = get_transform("cqt", n_bins_mult = 150, y_axis = "hz", norm = 1, method = "pow" ) #filter_scale = 0.2
NBINS = 100

#plt.subplot(1,2,1)

                              #hop_length_mult = 1)
#plt.title("dense")
#save_pickle("18th_cqt_low", cqt_low_pitch)

#plt.subplot(1,2,2)
cqt_high_pitch  = get_transform("cqt", 
                                pth = pth_high,  
                                n_bins_mult = NBINS,
                                label = "18th_cqt_high")

In [None]:
# At the moment log(power) and default db setting appear the same. consider custom functions.


fourier = get_transform("stft", n_bins_mult = 150, method = "pow") # y_axis = "log",)
save_pickle("fourier_power_low", fourier_db_low)

pth_high = "/Users/hayden/Desktop/18th_century_high.m4a"
plt.title("dense")
fourier = get_transform("stft", n_bins_mult = 150, # y_axis = "log",
                       pth = pth_high, method = "pow")
plt.title("high")

In [None]:
%run -i '../MARIOS/PyFiles/imports.py'
%run -i '../MARIOS/PyFiles/helpers.py'
%run -i "../MARIOS/PyFiles/experiment.py"

exper_ = get_experiment(experiment_lst[0], plot_split = False, compare_ = False)
print(exper_.A.shape)

In [None]:

librosa.display.specshow(exper_.A.T, y_axis='cqt_note', x_axis='time')

# convert dB to Pascals: (Pa)

https://www.translatorscafe.com/unit-converter/en-US/sound-pressure-level/2-9/pascal-sound%20pressure%20level%20in%20decibels/#:~:text=Sound%20pressure%20level%20Lp,20%20%CE%BCPa%20or%200.00002%20Pa)

"Sound pressure level (SPL) is a logarithmic (decibel) measure of the sound pressure relative to the reference value of 20 μPa threshold of hearing. The threshold of hearing is the quietest sound that most young healthy people can hear. Sound pressure level Lp is measured in decibels (dB) and is calculated as follows:"

$L_p = 20*log_{10} (p/p_0)$

Thus:
$(p/p_0) = 10^{L_p/20}$

usually $p_0 = 20 \mu $ Pa or 0.00002 Pa

"The sound pressure level is an absolute value because it is referenced to another absolute value — the threshold of hearing. Therefore, the sound pressure in linear values like pascals can be converted into the sound pressure level in decibels and vice versa if the reference sound pressure is known."

So p = 0.00002 * 10^{L_p/20}

# convert dB to Intensity of sound: 

https://www.omnicalculator.com/physics/db#sound-intensity-level-sil

Sound intensity is defined as the sound wave power per unit area. It is a special quantity that allows us to measure the energy of sound (or, to be more precise, the energy per second per one squared meter).

SIL = $10*log_{10}\left(\frac{I}{I_{ref}}\right)$

where:
SIL is the sound intensity level in dB;
I is the sound intensity in watts per squared meter;
Iref is the reference value if sound intensity. Typically, it is assumed to be equal to 1×10⁻¹² W/m².

$$I = 10^{\frac{SIL}{10}-12}$$

# The threshold of hearing:
The threshold of hearing is generally reported as the RMS sound pressure of 20 micropascals, i.e. 0 dB SPL, corresponding to a sound intensity of 0.98 pW/m2 at 1 atmosphere and 25 °C.[3] It is approximately the quietest sound a young human with undamaged hearing can detect at 1,000 Hz.[4] The threshold of hearing is frequency-dependent and it has been shown that the ear's sensitivity is best at frequencies between 2 kHz and 5 kHz,[5] where the threshold reaches as low as −9 dB SPL.

In [None]:
hi = get_experiment(experiment_lst[0])
dat = hi.A
sns.distplot(dat)

In [None]:
def log_normal_parameters(array):
    log_array    = np.log(array)
    sample_mu, sample_sigma   = np.mean(log_array), np.std(log_array)
    sample_variance = sample_sigma**2
    estimated_mean = np.exp(sample_mu + 0.5*sample_variance)
    estimated_variance = estimated_mean**2 * (np.exp(sample_variance) - 1) #np.exp
    
    est_params = {"mean" : estimated_mean,
                  "sd" :   np.sqrt(estimated_variance)}
    return(est_params)

def dB2Pa(db_np, normalize = False, drop_silent = False, relative = True):
    """
    converts a decibel level to pascals, for numpy arrays
    
    if normalize is set to true it normalizes the data assuming a log-normal distribution.
    
    if drop_silent is true, the function will flatten sounds not hearable by the human ear ( less that 20 * 10-6 pascals)
    """
    p0 = 0.00002
    pa_np = 10**(db_np/20)* p0 
    
    hearing_threshold = 20 * 10 **(-6)
    faint_sounds = pa_np < hearing_threshold
    """
    if normalize: #pa_np has a log-normal distribution: https://stats.stackexchange.com/questions/173715/calculate-variance-and-standard-deviation-for-log-normal-distribution
        pa_np = np.log(pa_np) # it has a log-normal distribution roughly, so we transform, normalize, then transform back
        pa_np = (pa_np - np.mean(pa_np))/np.std(pa_np)
        pa_np = np.exp(pa_np)
        
     
    """
    if normalize: #pa_np has a log-normal distribution: https://stats.stackexchange.com/questions/173715/calculate-variance-and-standard-deviation-for-log-normal-distribution
        params = log_normal_parameters(pa_np)
        print(params)
        
        mn, sig = params["mean"], params["sd"]
        #hearing_threshold = (hearing_threshold - mn)/sig
        pa_np = ((pa_np - mn)/sig)
        #pa_np = pa_np  - np.min(pa_np)
    
    if relative:
        pa_np = pa_np/ hearing_threshold
    
    #Flatten the sounds which the human ear cannot hear. rounding to 5 places preserved the lower bound of human hearing.
    if drop_silent:
        pa_np[faint_sounds] = np.round(pa_np[faint_sounds], 6)
        #new lower bound to avoid 0 values:
        pa_np_threshold = 0.000002 #20 * 10 ** (-7)
        pa_np[pa_np < pa_np_threshold] = pa_np_threshold
    return(pa_np)


def dB2Intensity(db_np):
    """
    converts a decibel level to pascals, for numpy arrays
    *
    """
    power = db_np/10 -12
    Intensity = np.power(10, power)
    return(Intensity)



In [None]:

Xpow =  dB2Pa(dat, normalize =  True)

sns.distplot(Xpow)#Log- Normal!


In [None]:
#Xdb_normal = (Xdb - np.mean(Xdb))/np.std(Xdb)
Xpow =  dB2Pa(Xdb, normalize = True, drop_silent = False)
sns.distplot(np.log10(Xpow)) 
min_sound_human_hearing = 20 * 10**(-6)
plt.xlabel("log(Pascals)")
plt.title("ke plot of Pascal values")
plt.ylabel("")
plt.axvline(x=np.log10(min_sound_human_hearing) , color = "red")

20 micro Pascals
The softest sound a normal human ear can detect has a pressure variation of 20 micro Pascals, abbreviated as µPa, which is 20 x 10-6 Pa ("20 millionth of a Pascal") and is called the Threshold of Hearing.

In [None]:

def linear_log_comparison(dataset, 
                          method = "propto-dB", 
                          propto = True, 
                          normalized = True,
                          log = False,
                          drop_silent = True):

    # we are only proportional because we have normalized the data.
    assert method in ["propto-dB", "propto-SIL", "propto-Pa"], "choose decibels or sound energy"
    
    if method == "propto-SIL":
        plot_value_label = "SIL "
        dataset = dB2Intensity(dataset)
        if log:
            dataset = np.log(dataset)/np.log(10)
        ylab = 'SIL (Sound Intensity Level)'  #sound wave power per unit area')
            #dataset = (dataset - np.mean(dataset))/np.std(dataset) <-- gets you  what you started with
    elif method == "propto-Pa":
        
        plot_value_label = " (Pa)" #sound pressure
        ylab = "Pascal"
        
        dataset = dB2Pa(dataset, normalize = normalized, drop_silent = False)
        #dataset = np.abs(dataset)
        #dataset = np.log(dataset) #/np.std(dataset)
        #dataset = (dataset - np.mean(dataset))/ np.std(dataset)
        
    else:
        plot_value_label = " (dB)" # decibels
        ylab = "Hz"
        #lower_db_limit = -20
        
        if normalized:
            dataset = (dataset - np.mean(dataset))/np.std(dataset)
            #lower_db_limit = (lower_db_limit - np.mean(dataset))/np.std(dataset) 
        #if drop_silent:
        #    dataset[dataset < lower_db_limit] = lower_db_limit
            
    
    plot_title = " spectrogram of '18th century'" + plot_value_label
    
    plt.figure(figsize = (12,8))
    
    # Linear spectogram Plot
    ax1 = plt.subplot(1,2,1)
    librosa.display.specshow(dataset, y_axis='linear', x_axis='time')
    
    #display(quadmesh_.get_axes()) # get the first line, there might be more

    #print(ax1.get_axes())#.get_xdata())
    #print(plt.get_xdata())
    plt.title('Linear ' + plot_title)
    add_experiment_regions(ax1)
    plt.ylabel("Hz")
    
    #legend
    legend_elements = [Patch(facecolor='pink', edgecolor='red',     label='3500 to 4500 Hz'),
                       Patch(facecolor='lightblue', edgecolor='blue',   label='1500 to 2500 Hz'),
                       Patch(facecolor='palegreen', edgecolor='green', label='250 to 12250 Hz')]
    plt.legend(handles=legend_elements, loc='upper left')

    # Log spectogram Plot
    ax2 = plt.subplot(1,2,2)
    librosa.display.specshow(dataset, y_axis='log', x_axis='time')
    plt.title('Log ' + plot_title)
    add_experiment_regions(ax2)
    plt.ylabel("Hz")
    
    
    
    if method == "propto-dB":
        propto_str = '%+2.0f' + str(r'$\propto$') if propto else '%+2.0f'
        colorbar_label = propto_str +" dB" #+
    elif method == "propto-SIL":
        colorbar_label = "%+2.0f e-12 SIL"  # + propto_str + 
    elif method == "propto-Pa":
        colorbar_label = "%.1e Pa"  # propto_str +

    plt.colorbar(format= colorbar_label)
    
    plt.tight_layout()
    sns.distplot(dataset)
    return(dataset)

def add_experiment_regions(ax, plot = True):
    def fill_region(lb, ub, color_):
        ax.axhline(y=lb, color = color_, linestyle='-')
        ax.axhline(y=ub, color = color_, linestyle='-')
        x  = np.arange(0.0, 27, 0.1)
        y1 = lb + 0 * x
        y2 = ub + 0 * x
        ax.fill_between(x, y2, y1, alpha = 0.2, color = color_)
    trial = 2
    if trial == 1:
        lb_targ, ub_targ, obs_hz  = 210, 560, 320 / 2

    elif trial == 2:
        lb_targ, ub_targ, obs_hz  = 340, 640, 280
    if plot:
        fill_region(lb_targ-obs_hz, lb_targ, "b")  #150 hz
        fill_region(lb_targ, ub_targ, "g") #250 hz
        fill_region(ub_targ, ub_targ + obs_hz, "b") #150 hz
    else:
        obs_list = list(range(lb_targ-obs_hz, lb_targ, 10))
        obs_list += list(range(ub_targ, ub_targ + obs_hz, 10))
        resp_list = list(range(lb_targ, ub_targ, 10))
        obs_resp = {"target": resp_list, "obs": obs_list}
        return obs_resp

### Decibel spectogram (normalized)

In [None]:
exper_ = experiment_lst[0]
exper_obj = get_experiment(exper_)
dat = exper_obj.A.T
#hi = (hi - np.mean(hi))/np.std(hi)
dataset_Pow = linear_log_comparison(dat, 
                      propto = False, 
                      normalized = False,
                      drop_silent = False,
                      method = "propto-Pa") 

dataset_db = linear_log_comparison(dat, 
                                    propto = False, 
                                    normalized = False,
                                    drop_silent = True,
                                    method = "propto-dB") 

custom_transform = {"transform": {
                        "Xdb"  : dataset_db,
                        "Xpow" : dataset_Pow,
                        "f"  :   exper_obj.f}
                   }
save_pickle("custom",custom_transform)
#load_pickle("custom")

# USE THIS NORMALIZED Pa DATASET!!!

In [None]:
sns.distplot(dataset)

# Decibel spectogram, unnormalized

In [None]:
linear_log_comparison(exper_, 
                      propto = False, 
                      normalized = False,
                      drop_silent = False) 

# Pascal spectogram, normalized

In [None]:
linear_log_comparison(exper_, method = "propto-Pa", normalized = True)

# Pascal spectogram, unnormalized

In [None]:
linear_log_comparison(exper_, method = "propto-Pa", normalized = False)

# Lets look at the average sound pressure per log frequency

In [None]:
dB2Pa(exper_.A_unnormalized)

In [None]:
linear_log_comparison(inputt = Xdb, method = "propto-Pa", drop_silent = False)

In [None]:
plt.imshow(exper_.A)

In [None]:
def create_log_spectogram(dataset = exper_.A, f_arr = exper_.f):
    
    T = exper_.T
    plt.imshow(dataset)
    
    f = np.array(f_arr)[1:] # humans hearing ranges from 20 db to 20k db so lets drop 0 to avoid -infty.
    dataset = dataset[:,1:]
    
    f = np.log(f)/np.log(2) # humans experience sound logarithmically
    
    sns.distplot(dataset)
    plt.show()
    n_timesteps, n_frequencies  = dataset.shape

    for i, time_step in enumerate(range(n_timesteps)):
        if not i:
            dictt_lst = []
        this_timestep = T[time_step][0]
        
        #assert len(f) == len(this_timestep), "error: " + str(len(f)) + " != " + str(len(this_timestep))
        for i, frequency_spec in enumerate(f):              
            dictt_lst += [{"frequency" : frequency_spec , 
                           "time"      : this_timestep,
                           "amplitude" : dataset[time_step, i]
                            }]
        #display(pd.DataFrame(dictt_lst))
    
    log_frequency_df = pd.DataFrame(dictt_lst)
    log_frequency_df = log_frequency_df.pivot("frequency", "time", "amplitude")
    
    sns.heatmap(log_frequency_df)
    
create_log_spectogram()

In [None]:
flights = sns.load_dataset("flights")
flights.head()

In [None]:
librosa.display.specshow(Xdb, y_axis='log', x_axis='time')
plt.title('log Power spectrogram')
plt.tight_layout()

In [None]:
plt.imshow(Xdb, aspect = 10)#.shape

In [None]:
bounds_ = f_[1], f_[15] #_ denotes temporary variable, for testing or within a function.

lb_, ub_ = bounds_

def retrieve_freqs_btwn(bounds, f_):
    f = np.array(f_)
    lb, ub = bounds
    display(bounds_)
    lb_bool_vec, ub_bool_vec = (f > lb_), (f < ub_)
    and_vector = ub_bool_vec* lb_bool_vec

    freqs = f[and_vector]            #frequencies between bounds
    freq_idxs = np.where(and_vector)[0] #indices between bounds

    return(freq_idxs.tolist())
    
retrieve_freqs_btwn(bounds_, f_)

In [None]:
"""
def log_amplitude(exper_):
    A = np.array(exper_.A)
    print(np.min(A))
    print(np.max(A))
    orig_shape = A.shape
    
    signs = A.copy().reshape(-1,) < 0
    signs = signs * 2 - 1
    print(np.unique(signs))
    signs = signs.reshape(orig_shape)
    #plt.imshow(signs)
    A_new = np.log(np.abs(A)) * signs
    
    #A_new = (A_new - np.mean(A_new))/ np.std(A_new)
    #print(np.min(A_new))
    #print(np.max(A_new))
    #plt.imshow(A_new)
    return(A_new)
"""

In [None]:
hi = load_pickle('./pickle_files/results/18th_cqt_high/db/untouched/split_0.5/tf_250__obsHz_0.1__targHz_0.02.pickle')

In [None]:
hi = load_pickle("custom")

sns.heatmap(hi["Xpow"])

In [None]:
hi = load_pickle("18th_cqt_high")
print(hi["transform"]["Xdb"])
sns.heatmap(hi["transform"]["Xdb"])


In [None]:
%run -i '../MARIOS/PyFiles/imports.py'
%run -i '../MARIOS/PyFiles/helpers.py'
%run -i "../MARIOS/PyFiles/experiment.py"
def get_frequencies(trial = 1):
    """
    get frequency lists
    """
    if trial == 1:
        lb_targ, ub_targ, obs_hz  = 210, 560, int(320 / 2)

    elif trial == 2:
        lb_targ, ub_targ, obs_hz  = 340, 640, 280
    elif trial == 3:
        lb_targ, ub_targ, obs_hz  = 340, 350, 20


    obs_list = list(range(lb_targ-obs_hz, lb_targ, 10))
    obs_list += list(range(ub_targ, ub_targ + obs_hz, 10))
    resp_list = list(range(lb_targ, ub_targ, 10))
    return obs_list, resp_list

obs_freqs, resp_freqs = get_frequencies(1)
librosa_args = { "spectrogram_path": "custom",#"cqt_high_pitch",
                         "librosa": True}
#inputs = {'obs_freq_lst' :, "targ_freq_lst": , "split": 0.5}
                       
additional_Echo_inputs = {
            "obs_freq_lst":  obs_freqs,
            "targ_freq_lst" : resp_freqs
            }
Echo_inputs = {
        "size" : "medium",
        "verbose" : False,
        "prediction_type" : "block"}
Echo_inputs = Merge(Echo_inputs, additional_Echo_inputs)
experiment = EchoStateExperiment( **Echo_inputs, **librosa_args)
experiment.get_observers(method = "exact", split = 0.5, aspect = 0.9, plot_split = False)
experiment.obs_idx

In [None]:
!tree pickle_files

In [None]:
dat = load_pickle("./pickle_files/spectrogram_files/18th_cqt_high.pickle")
g_truth = dat["transform"]["Xdb"]
g_truth = (g_truth - np.mean(g_truth))/np.std(g_truth)
line = g_truth[35][513:]
fig, ax = plt.subplots(1,1,figsize = (10,4))
sns.lineplot(x = range(len(bye)), y = bye, label = "unif")
sns.lineplot(x = range(len(ip)), y = ip, label = "ip")
sns.lineplot(x = range(len(line)), y =line, label = "gtruth")


In [None]:
A_shape_0 = 1000
def get_obs_eq(k):
    hi = A_shape_0//k
    viable_start = np.random.randint(hi)
    observers = [k*i + viable_start for i, idx in  enumerate(range(viable_start, A_shape_0, k))]
    print(observers)
        
get_obs_eq(25)    

# For Zhizhuo

In [None]:
#plt.imshow(test1.ip_res)
#https://stackoverflow.com/questions/35215161/most-efficient-way-to-map-function-over-numpy-array
%run -i '../MARIOS/PyFiles/imports.py'
%run -i '../MARIOS/PyFiles/helpers.py'
%run -i "../MARIOS/PyFiles/experiment.py"

def pure_prediction_ip_generator(missing_data, end_idx):
    test_idx = list(range(end_idx))[-missing_data:] #553, 712, 942
    print(test_idx)
    train_range_input = end_idx - missing_data
    train_idx = list(range(train_range_input))
    print(train_range_input)

    experiment_inputs1 =  {'size': 'medium', 
                           'target_frequency': None, 
                           'verbose': False, 
                           'prediction_type': 'column', 
                           "interpolation_method" : "griddata-nearest",
                           'train_time_idx': train_idx,
                           'test_time_idx' : test_idx}#[514, 515, 516, 517, 518, 519, 520, 521, 522, 523]}#[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249], 'test_time_idx': [250, 251, 252, 253, 254, 255, 256, 257, 258, 259]}

    test1 = EchoStateExperiment(**experiment_inputs1)
    obs_inputs1 =  {'split': 0.5, 'aspect': 0.9, 'plot_split': False, 'method': 'exact'}
    test1.get_observers(**obs_inputs1)


    import math
    def f(x):
        """
        check if x is nan
        x = float('nan')
        math.isnan(x)
        """
        return math.isnan(x)
    def array_map(x, f):
        x_shape= x.shape
        print("x_shape" + str(x.shape))
        x  = x.flatten().tolist()
        hi = np.array(list(map(f,x)))
        print(hi)

        return np.array(hi).reshape(x_shape)

    test1_ip_pred = test1.ip_res["prediction"]

    plt.imshow(array_map(test1_ip_pred, f))
    test1_ip_pred

    my_dict = {
        "interpolation_prediction": test1.ip_res["prediction"],
        "ground_truth_test"  : test1.xTe,
        "ground_truth_train" : test1.xTr,
        "interpolation_MSE"  : test1.ip_res["nrmse"]
    }

    from scipy.io import savemat

    save_path = "zhizhuo/testindex_" + str(test_idx[0]) + "_" + str(test_idx[-1]) +".mat"

    print(save_path)
    savemat(save_path, my_dict) #"zhizhuo/testindex_514_523.mat"
    plt.imshow(test1.ip_res["prediction"], aspect = 0.1)
    return(test1)
    


In [None]:
test_lst = [{"missing_data" : 40, "end_idx" : 289},
            {"missing_data" : 40, "end_idx" : 553},
            {"missing_data" : 40, "end_idx" : 712},
            {"missing_data" : 40, "end_idx" : 942}]
test_results = []
for prediction in test_lst:
    pred_ = pure_prediction_ip_generator(**prediction)
    test_results.append(pred_)

In [None]:
test_rez = [ test_result.ip_res["prediction"] for test_result in test_results]
test_ground = [ test_result.xTe for test_result in test_results]
count = 1
for i, rez in enumerate(test_rez):
    plt.imshow(rez, aspect = 10)
    plt.title("prediction" + str(count))
    
    plt.show()
    plt.imshow(test_ground[i], aspect = 10)
    plt.title("truth" + str(count))
    plt.show()
    count+=1

In [None]:
X = load_pickle('19th_century_male_stft')
plt.imshow(X['transform']['Xdb'].T)

In [None]:
get_transform("stft", label = "19th century male voice", pth = "/Users/hayden/Desktop/computer_male.mp3", save_path = "19th_century_male")

# when it comes time to run a lot of tests

In [None]:

%run -i '../MARIOS/PyFiles/imports.py'
%run -i '../MARIOS/PyFiles/helpers.py'
%run -i "../MARIOS/PyFiles/experiment.py"
def quick_write_path(freq, split, targHz, obsHz, size = "/medium"):
    if freq == 2000:
        freqStr = "2k"
    elif freq == 4000:
        freqStr = "4k"
    splitStr = "/split_" + str(split)
    targHz, obsHz = str(targHz/1000) , str(obsHz/1000)
    HzStr = "/targetKhz:_" + targHz + "__obskHz:_" +  obsHz 
    newPath = freqStr + size + splitStr + HzStr +".txt"
    return([newPath])

def quick_write_dict(freq, split, targHz, obsHz):
    dict_tmp = {'target_freq': freq, 'split': split, 'obs_hz': obsHz, 'target_hz': targHz}
    return([dict_tmp])


path_lst = []
dict_lst = []
for targ_freq in [2000, 4000]:
    for split in [0.5, 0.9]:
        for targ in list(range(500, 2001, 250)):
            for obs in list(range(500, 2001, 250)):
                path_lst += quick_write_path(freq = targ_freq, split = split, targHz = targ, obsHz = obs)
                dict_lst += quick_write_dict(freq = targ_freq, split = split, targHz = targ, obsHz = obs)


In [None]:
# Checking out low frequency results
exper_lst = []
bp_ = "/Users/hayden/Desktop/DL_LAB/Reservoir/MARIOS/pickle_files/results/custom/power/untouched/"

new_exper_path_lsts = [
    "split_0.5/tf_485.0__obsNIdx_56__targNIdx_30.pickle",
    "split_0.9/tf_380.0__obsNIdx_32__targNIdx_35.pickle"
    #tf_485.0__obsNIdx_56__targNIdx_30.pickle
]


for i in new_exper_path_lsts:
    exper_ = load_p_result(i, bp = bp_)
    exper_lst += [exper_]
    
xpow = load_pickle("custom")["transform"]["Xpow"]

this_experiment = exper_lst[0]
resp_idx_ = this_experiment["resp_idx"]
print(resp_idx_)
resp_ = xpow[resp_idx_]
sns.heatmap(resp_)
plt.show()
sns.heatmap(resp_[:,512:])
plt.show()
sns.heatmap(np.array(exper_lst[0]["prediction"]["exponential"]).T)
plt.show()
sns.heatmap(np.array(exper_lst[0]["prediction"]["interpolation"]).T)