# <b>Data Diagnostics I </b> *✲ﾟ*｡✧٩(･ิᴗ･ิ๑)۶*✲ﾟ*｡✧

In this notebook we will explore derivatives and see how our data changes. This helps smooth out data and can be the basis for future dynamical systems analysis.

## Time Derivatives - Total Variation Regulzariation
Since we are interested in the shape of our data and want to eliminate noise as much as possible, we will take the time derivative of our data. 
To this end we will use an iterative total variation regularization method to compute the first order derivative of our data. Finite difference methods estimate derivatibes by looking at the changes in the values over small intervals dt. This time step size dt is the reciprocal of the sampling frequency which is 3 in our case.
We will apply this on each dataset individually.

In [None]:
# fastest results
resampled_derivatives = hf.compute_derivatives(quartiled_data, length_dict,1,0.01)
resampled_derivatives.to_pickle("resampled_derivatives_It1_Gam01.pkl")

In [None]:
# iteration 2 and 10
resampled_derivatives_It2= hf.compute_derivatives(quartiled_data, length_dict, 2, 0.01)
resampled_derivatives_It5 = hf.compute_derivatives(quartiled_data, length_dict, 5, 0.01)

In [None]:
# iteration 2 and gamma 0.001 and 10
resampled_derivatives_Gam001 = hf.compute_derivatives(quartiled_data, length_dict, 2, 0.001)
resampled_derivatives_Gam10 = hf.compute_derivatives(quartiled_data, length_dict, 2, 0.1)

In [None]:
resampled_derivatives_Gam001["state"] = truncated_dataframe["state"]
resampled_derivatives_Gam10["state"] = truncated_dataframe["state"]
resampled_derivatives_It2["state"] = truncated_dataframe["state"]
resampled_derivatives_It5["state"] = truncated_dataframe["state"]

In [None]:
%%capture
%matplotlib widget
saving_path2="..\\plots\\23Jan\\totalvariation_plots\\Iteration2Gamma0.01\\"
saving_path5="..\\plots\\23Jan\\totalvariation_plots\\Iteration5Gamma0.01\\"
saving_path001="..\\plots\\23Jan\\totalvariation_plots\\Iteration2Gamma0.001\\"
saving_path10="..\\plots\\23Jan\\totalvariation_plots\\Iteration2Gamma0.1\\"

hf.plot_from_stacked_imputed(length_dict, resampled_derivatives_It2, resampled_derivatives_It2, saving_path2)
hf.plot_from_stacked_imputed(length_dict, resampled_derivatives_It5, resampled_derivatives_It5, saving_path5)
hf.plot_from_stacked_imputed(length_dict, resampled_derivatives_Gam001, resampled_derivatives_Gam001, saving_path001)
hf.plot_from_stacked_imputed(length_dict, resampled_derivatives_Gam10, resampled_derivatives_Gam10, saving_path10)

In [None]:
saving_path2="..\\plots\\23Jan\\totalvariation_plots\\Iteration2Gamma0.01\\"
hf.plot_from_stacked_imputed(length_dict, resampled_derivatives_It2, resampled_derivatives_It2, saving_path2)

In [None]:
# best results: It 5, Gamma 0.01
resampled_derivatives_595_It5_Gam01 = hf.compute_derivatives(quartiled_data.loc[:,quartiled_data.columns!="state"], length_dict, 5, 0.01)
resampled_derivatives_595_It5_Gam01["state"] = quartiled_data["state"]
resampled_derivatives_595_It5_Gam01.to_pickle("resampled_derivatives_595_It5_Gam01.pkl")
# best results: It 5, Gamma 0.001
resampled_derivatives_595_It5_Gam001 = hf.compute_derivatives(quartiled_data.loc[:,quartiled_data.columns!="state"], length_dict, 5, 0.001)
resampled_derivatives_595_It5_Gam001["state"] = quartiled_data["state"]
resampled_derivatives_595_It5_Gam001.to_pickle("resampled_derivatives_595_It5_Gam001.pkl")
resampled_derivatives_595_It5_Gam001 = pd.read_pickle("resampled_derivatives_595_It5_Gam001.pkl")
saving_path5_1="..\\plots\\23Jan\\totalvariation_plots\\Iteration5Gamma0.001\\"

hf.plot_from_stacked_imputed(length_dict, resampled_derivatives_595_It5_Gam001, resampled_derivatives_595_It5_Gam001, saving_path5_1)

In [None]:
resampled_derivatives_cumsum = resampled_derivatives.copy()

dt = 1/3 # time step: 1/(frame rate)
start_index = 0
for dataset_idx in tqdm(range(len(dataframes.keys())), desc="Computing derivatives"):
    end_index = start_index + frames_num
    integrated = np.cumsum(resampled_derivatives_cumsum[start_index:end_index])
    resampled_derivatives_cumsum[start_index:end_index] = integrated + abs(integrated.min()) + 0.01 
    start_index = end_index


# plotting the trace of one neuron across all datasets
# and save the plot
fig, ax = plt.subplots(figsize=(40, 10))
ax.plot(resampled_derivatives_cumsum['AVAR'].T, color="tab:blue")
ax.set_ylabel("AVAR")
ax.set_xlabel("time")
ax.set_title("AVAR across all datasets")
fig.savefig("resampled_AVAR_alldatasets_It5.png")
pca = hf.PCA(n_components=3)
temporal_PCs_totalvariation = pd.DataFrame(pca.fit_transform(resampled_derivatives_cumsum))
%matplotlib widget
window_size = 10

# Applyin a 10-sample sliding average for smoother visualizations!
temporal_PCs_totalvariation[0] = np.convolve(temporal_PCs_totalvariation[0], np.ones(window_size)/window_size, mode='same')
temporal_PCs_totalvariation[1] = np.convolve(temporal_PCs_totalvariation[1], np.ones(window_size)/window_size, mode='same')
temporal_PCs_totalvariation[2] = np.convolve(temporal_PCs_totalvariation[2], np.ones(window_size)/window_size, mode='same')

temporal_PCs_totalvariation['state'] = turn_vec.values
hf.plot_PCs(temporal_PCs_totalvariation,temporal_PCs_totalvariation['state'],'PCA_derivatives_totalvariation.html')
hf.plot_PC_gif(temporal_PCs_totalvariation,temporal_PCs_totalvariation['state'],'PCA_totalvariation.gif')
pca = hf.PCA(n_components=3)
temporal_PCs_totalvariation = pd.DataFrame(pca.fit_transform(resampled_derivatives_It2_cumsum))

# Applyin a 10-sample sliding average for smoother visualizations!
temporal_PCs_totalvariation[0] = np.convolve(temporal_PCs_totalvariation[0], np.ones(window_size)/window_size, mode='same')
temporal_PCs_totalvariation[1] = np.convolve(temporal_PCs_totalvariation[1], np.ones(window_size)/window_size, mode='same')
temporal_PCs_totalvariation[2] = np.convolve(temporal_PCs_totalvariation[2], np.ones(window_size)/window_size, mode='same')

temporal_PCs_totalvariation['state'] = turn_vec.values
hf.plot_PCs(temporal_PCs_totalvariation,temporal_PCs_totalvariation['state'],'PCA_derivatives_totalvariation_It2.html')
hf.plot_PC_gif(temporal_PCs_totalvariation,temporal_PCs_totalvariation['state'],'PCA_totalvariation_It2.gif')

### Butterworth Smoothing

In [None]:
butterworth_derivatives = quartiled_data.copy()
dt = 1/3 # time step: 1/(frame rate)
start_index = 0
for dataset in dataframes.values():
    end_index = start_index + frames_num
    for col_index in range(len(butterworth_derivatives.columns)):
        x_hat, dxdt_hat = pdiff.smooth_finite_difference.butterdiff(resampled_derivatives.iloc[start_index:end_index, col_index], dt, [3, 0.09], options={'iterate': False}) # x_hat: estimated (smoothed) x, dxdt_hat: estimated dx/dt, [1, 0.0001]: regularization parameters -> gamma=0.2 is too high, derivatives become too blocky
        butterworth_derivatives.iloc[start_index:end_index, col_index] = dxdt_hat
    #if end_index != len(resampled_derivatives):
    #    resampled_derivatives.iloc[end_index, :] = np.nan #so that we have a separation between datasets   
    start_index = end_index
%%capture
%matplotlib widget
saving_path="C:\\Users\\LAK\\Documents\\butterworth_plots\\"


start_index = 0
count = 0

# we will unstack the dataframe and plot the traces for each dataset
for obs_count in list(length_dict.values()):

    # we take the number of observations from the length dictionary and add it to the start index
    end_index = start_index + obs_count
    res_data_df = butterworth_derivatives.iloc[start_index:end_index]

    fig = hf.plot_traces.make_grid_plot_from_two_dataframes(
            res_data_df, res_data_df)
    # fig, ax = plot_traces.make_grid_plot_from_dataframe(df_imputed)

    # save all plots in a folder
    pathname = saving_path + list(length_dict.keys())[count] + ".png"
    fig.savefig(pathname)
    plt.close(fig)
    start_index = end_index
    count += 1
resampled_derivatives_butter_cumsum = resampled_derivatives.copy()

dt = 1/3 # time step: 1/(frame rate)
start_index = 0
for dataset_idx in tqdm(range(len(dataframes.keys())), desc="Computing derivatives"):
    end_index = start_index + pts
    integrated_bt = np.cumsum(butterworth_derivatives[start_index:end_index])
    resampled_derivatives_butter_cumsum[start_index:end_index] = integrated_bt + abs(integrated_bt.min()) + 0.01

    start_index = end_index
pca = hf.PCA(n_components=3)
pca_butterworth = pd.DataFrame(pca.fit_transform(resampled_derivatives_butter_cumsum))
avg = pca_butterworth#.iloc[68595:72344]
avg["state"] = turn_vec.values#iloc[68595:72344].values

avg[0] = np.convolve(avg[0], np.ones(window_size)/window_size, mode='same')
avg[1] = np.convolve(avg[1], np.ones(window_size)/window_size, mode='same')
avg[2] = np.convolve(avg[2], np.ones(window_size)/window_size, mode='same')

hf.plot_PCs(avg,avg["state"] ,'PCA_butterworth.html')
hf.plot_PC_gif(avg,avg["state"] ,'PCA_butterworth.gif')