In [None]:
import trackpy as tp
import os
import pandas as pd
import numpy as np
import numba
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.lines as mlines
import pickle
import cv2
import math
from datetime import date
import datetime
import time
from IPython.display import display
import ipywidgets as widgets
import re
import scipy.optimize as sco
import seaborn as sns
import av
from tqdm.notebook import tqdm_notebook
import winsound
import tkinter as tk
from tkinter import filedialog
import warnings
from scipy.signal import savgol_filter
from IPython.display import display, HTML

warnings.filterwarnings(action='once')
display(HTML("<style>.container { width: 100% !important; }</style>")) # Set the custom CSS to make the notebook width 100% of the window size


def find_analysis_folders(root_folder):
    analysis_folders = []
    for root, dirs, files in os.walk(root_folder):
        for folder in dirs:
            if folder.lower() == "analysis":
                analysis_folders.append(os.path.join(root, folder))
    return analysis_folders


def get_subfolders(analysis_folders):
    subfolders = []
    for folder in analysis_folders:
        for subitem in os.listdir(folder): # not recursive; just go one level deeper.
            subitem_path = os.path.join(folder, subitem)
            if os.path.isdir(subitem_path):
                subfolders.append(subitem_path)
    return subfolders



In [None]:
create_folder_list = False


# avi_folder = r'C:\Users\vhorowit\Desktop\BessLawrence\Data'
avi_folder = r'G:\Shared drives\Horowitz Lab Notes\Rebecca Dalphin and Alex Axton\Data'
#blocking = 'Bess'
blocking = 'Rebecca and Alex'
AlexRebecca = True

info_csv = r'G:\Shared drives\Horowitz Lab Notes\Horowitz, Viva - notes and files\subdiffusion 2023\2023 Subdiffusion project data - Sheet1.csv'


#r'G:\Shared drives\Horowitz Lab Notes\Bess Lawrence Data'

if create_folder_list:
    analysis_folders = find_analysis_folders(r'G:\Shared drives\Horowitz Lab Notes\Rebecca Dalphin and Alex Axton\Analysis')
    folderlist = get_subfolders(analysis_folders)
else:
    folderlist = [r'G:\Shared drives\Horowitz Lab Notes\Rebecca Dalphin and Alex Axton\Analysis\2023-05-28',
                 r'G:\Shared drives\Horowitz Lab Notes\Rebecca Dalphin and Alex Axton\Analysis\2023-05-27']

In [None]:
info_df = pd.read_csv(info_csv)

In [None]:
this_info_df = info_df[info_df['Blocking'] == blocking]
this_info_df['color'] = this_info_df['color'].str.upper()

display(this_info_df)

In [None]:
#folderlist = [r'G:\Shared drives\Horowitz Lab Notes\Rebecca Dalphin and Alex Axton\Analysis\2023-05-27',
#             r'G:\Shared drives\Horowitz Lab Notes\Rebecca Dalphin and Alex Axton\Analysis\2023-05-28']

savefolder = r'G:\Shared drives\Horowitz Lab Notes\Rebecca Dalphin and Alex Axton\Analysis\viva-analysis'
#savefolder = r'C:\Users\vhorowit\Desktop\BessLawrence\Viva-Analysis'
saving = False

subtract_drift_slow = False
calculate_van_hove_slow = False

# scaling, measured in microns per pixel
#scaling = 330 / 1247.96 # 20x1.0, measured 2021-06-17
#scaling = 220 / 1250.04 # 20x1.5, measured 2021-06-17
scaling = 150 / 1127.54 # 40x1.0, measured 2021-06-16
#scaling = 100 / 1130.61 # 40x1.5, measured 2021-06-16
#scaling = 80 / 914.92 # 60x1.0, measured 2021-05-28
#scaling = 60 / 1031.07 # 60x1.5, measured 2021-05-28

In [None]:
def datestring():
    return datetime.datetime.today().strftime('%Y-%m-%d %H;%M;%S')

datestr = datestring()

#remove parentheses and numbers from filenames
def remove_number_parentheses(filename):
    return re.sub(r'\(\d\)', '', filename)

def savefigure(savename):
    try:
        plt.savefig(savename + '.svg', dpi = 600, bbox_inches='tight', transparent=True)
    except:
        print('Could not save svg')
    try:
        plt.savefig(savename + '.pdf', dpi = 600, bbox_inches='tight', transparent=True)
           # transparent true source: https://jonathansoma.com/lede/data-studio/matplotlib/exporting-from-matplotlib-to-open-in-adobe-illustrator/
    except:
        print('Could not save pdf')
    plt.savefig(savename + '.png', dpi = 600, bbox_inches='tight', transparent=True)
    print("Saved:\n", savename + '.png')

def beep():
    try:
        winsound.PlaySound(r'C:\Windows\Media\Speech Disambiguation.wav', flags = winsound.SND_ASYNC)
        return
    except:
        pass
    try:
        winsound.PlaySound("SystemHand", winsound.SND_ALIAS)
        return
    except:
        pass
    try:
        winsound.Beep(450,150)
        return
    except:
        pass

In [None]:
df_list = []
filename_list = []

"""
Note about file names
unfiltered.pkl = every particle trajectory, immediately after it was linked.
.pkl = filtered to remove stubs
nodrift.pkl = drift has been subtracted
"""



for folder in folderlist:
    os.chdir(folder)
    # Iterate over files in the folder
    for filename in os.listdir(folder):
        if filename.endswith('control_tracer.pkl'):
            #print(os.path.join(folder,filename))
            if os.path.isfile(filename):
                df_list.append(pd.read_pickle(filename))
                filename_list.append(os.path.join(folder,filename))
            else:
                print('is not a file. **************')
            
#filename_list

In [None]:
def extract_short_file(filepath):
    match = re.search(r'Data Taken (\d{4}-\d{2}-\d{2}), (.*)control_tracer.pkl', filepath)
    if match:
        extracted_part = match.group(2).strip()
    else:
        base_name = os.path.basename(filepath)  # Get the base name from the path
        start_index = base_name.rfind(',') + 2  # Find the index of the comma and add 2 to skip the comma and space
        extracted_part = base_name[start_index:].replace("control_tracer.pkl", "").strip()  # Remove "control_tracer.pkl" and leading/trailing spaces
    return extracted_part

shortfiles = [extract_short_file(item) for item in filename_list]


analysis_files_df = pd.DataFrame({'Analysis file': filename_list, 'control_tracer.pkl': shortfiles })

In [None]:
def get_fps_from_avi(avi_file): # avi_file is a file path
    assert(os.path.exists(avi_file))
    try:
        video = cv2.VideoCapture(avi_file)
        fps = video.get(cv2.CAP_PROP_FPS)
        video.release()
        if fps == 0:
            raise Exception(f"fps must not be zero: {avi_file}")
    except:
        print('Using backup method')
        import av
        container = av.open(avi_file)
        fps = container.streams.video[0].average_rate
        container.close()
    return float(fps)

data_files = []
fps_list = []
#os.chdir(r'G:\Shared drives\Horowitz Lab Notes\Rebecca Dalphin and Alex Axton')
#for filename in os.listdir(r'G:\Shared drives\Horowitz Lab Notes\Rebecca Dalphin and Alex Axton'):
for root, dirs, files in os.walk(avi_folder):
    for filename in files:
        full_path = os.path.join(root, filename)
        if filename.endswith('.avi'):
            #print(full_path)
            fps = get_fps_from_avi(full_path)
            data_files.append(full_path)
            fps_list.append(fps)
            #print(fps)

datafiles_df = pd.DataFrame({"Data file":data_files, "fps": fps_list })

datafiles_df['data_file name'] = datafiles_df['Data file'].apply(lambda x: x.split('\\')[-1])

"""with pd.option_context('display.max_rows', None, 
                       'display.max_columns', None, 
                       'display.max_colwidth', None, 
                       'display.expand_frame_repr', False):
    display(datafiles_df)""";

    


In [None]:
combined_df = datafiles_df.merge(this_info_df, on='data_file name')
combined_df = analysis_files_df.merge(combined_df, on='control_tracer.pkl')

combined_df

In [None]:
list(combined_df['data_file name'])

In [None]:
list(combined_df['Data file'])

In [None]:
(combined_df[['data_file name', 'control_tracer.pkl', 'fps']]).transpose()

In [None]:
list(combined_df['Analysis file'])

In [None]:
len(datafiles_df)

In [None]:
len(combined_df)

In [None]:
len(this_info_df)

In [None]:
len(analysis_files_df)

In [None]:
analysis_files_df

In [None]:
this_info_df.columns

In [None]:
subtract_drift_slow = False

In [None]:
os.chdir(savefolder)

start = time.time() # about 3 minutes

if subtract_drift_slow:
    from rotational_drift_subtraction import drift_subtract
    
    warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
    # Some day this will come back to bite me. Today is not that day.


    df_list_nodrift = []
    for df in tqdm_notebook(df_list):
        _, df_nodrift = drift_subtract(tracer = df, show_plots=False ) # here, nodrift means the drift has been subtracted.
        df_list_nodrift.append(df_nodrift)


    # Re-enable the warning
    warnings.filterwarnings("once", category=PendingDeprecationWarning)
    
    data_dict = {'filename_list': filename_list, 'df_list_nodrift':df_list_nodrift}
    with open('drift_subtracted_trajectories_dictionary.pkl', 'wb') as file:
        pickle.dump(data_dict, file)
else:
    with open('drift_subtracted_trajectories_dictionary.pkl', 'rb') as file:
        data_dict = pickle.load(file)
    print('Opened file:', str(os.getcwd()), '\n', 'drift_subtracted_trajectories_dictionary.pkl')
    # Extract elements from the dictionary
    filename_list = data_dict['filename_list']
    df_list_nodrift = data_dict['df_list_nodrift'] 
    
    
end = time.time()
print((end-start)/60, 'minutes elapsed')


In [None]:
index = 0 
display(filename_list[index])
df_list_nodrift[index]

In [None]:
df_series_nodrift = pd.DataFrame(data_dict)['df_list_nodrift']

assert len(df_series_nodrift) == len(df_list_nodrift)

In [None]:
debug = False

skipfirst = False

if skipfirst:
    ## Skip the first frame of each movie: it's the most likely to have dropped frames.
    df_list_nodrift = [df_series_nodrift[video][df_series_nodrift[video].frame > 1] for video in range(len(df_series_nodrift))]


if debug and len(filename_list) == len(df_list_nodrift):
    df_list_nodrift_orig = df_list_nodrift.copy()
    df_list_nodrift = df_list_nodrift[10:15] # Just work with a few of the movies.

In [None]:
"""
Normalizes histogram data so that the sum of probabilities is one.

@param histdata - the starting histogram

@return the normalized histogram of probabilites
"""
def manualnorm(histdata):
     return (1/(histdata.sum()*binwidth))*histdata
    
"""
Outputs f(x) where f is a Gaussian curve.

@param x - the independent variable
@param a [0] - Gaussian amplitude
@param center [1] - Gaussian center
@param sigma [2] - Gaussian standard deviation (width)

@return f(x)
"""
def gaussian(x,a,center,sigma):
    return a*(np.exp(-((x-center)**2)/(2*(sigma**2)))) # sigma is standard deviation.

height_index = 0
center_index = 1
sigma_index = 2 # after x, the parameters are 0: a, 1: center, 2: sigma

In [None]:
len(shortfiles)

In [None]:
len(df_list_nodrift)

In [None]:
shortfiles

In [None]:
#shortfiles == combined_df['control_tracer.pkl']  # all true

# Mean square displacement

In [None]:
df_series_nodrift # this one doesn't have a skip-first.

In [None]:
os.getcwd()

In [None]:
## Set values
emsd_max_lagtime = 100
saving = True

calc_fresh_emsd_slow = False    # True: 7 minutes. False: 3 seconds.

start = time.time() 
fig, ax = plt.subplots(1, 1)

# Create empty lists to store handles and labels
handles = []
labels = []
label_colors = {}

if calc_fresh_emsd_slow:
    emsd_list = []

fit_A_values = []  # List to store this_fit.A[0] values
fit_n_values = []  # List to store this_fit.n[0] values
## Iterate through files and calculate emsd

# tp.imsd(data, mpp=scaling, fps=fps, max_lagtime=1000)
# tp.emsd(data, mpp=scaling, fps=fps, max_lagtime = emsd_max_lagtime).replace(0, np.nan).dropna()

## I'm getting this warning for trackpy but that is not my problem.
# Warning message:
# Using the level keyword in DataFrame and Series aggregations is deprecated and will be removed in a future version. 
# Use groupby instead. df.median(level=1) should use df.groupby(level=1).median().
# https://github.com/soft-matter/trackpy/issues/738
assert len(df_series_nodrift) == len(combined_df)
assert len(df_list_nodrift) == len(combined_df)

if calc_fresh_emsd_slow: # Calculate eMSD
    warnings.filterwarnings("ignore", category=FutureWarning)

    for drift_subtracted_traj_df, (_, file_info) in zip(df_list_nodrift, combined_df.iterrows()):
        this_emsd = tp.emsd(
            drift_subtracted_traj_df, 
            mpp=scaling, 
            fps=file_info.fps, 
            max_lagtime = emsd_max_lagtime).replace(0, np.nan).dropna()
        emsd_list.append(this_emsd)

    warnings.filterwarnings("once", category=FutureWarning)
    
    #Save emsd_list 
    if skipfirst:
        string = ',frame_1_removed'
    else:
        string = ',frame_1_included'
    datestr = datestring()
    emsd_file = datestr + 'emsd_file' + string + '.pkl' 
    
    with open(emsd_file, 'wb') as file:
        pickle.dump(emsd_list, file)
    print('Saved:', emsd_file)            

else: # Load eMSD file
    file_to_open = '2023-08-10 20;57;34emsd_file,frame_1_included.pkl'
    with open(file_to_open, 'rb') as file:
        emsd_list = pickle.load(file)
    print('Loaded:',file_to_open )
    
# Plot and curve-fit eMSD
for this_emsd,drift_subtracted_traj_df, (_, file_info) in zip(emsd_list, df_list_nodrift, combined_df.iterrows()):

    this_emsd.plot(loglog=True, figsize = [3,3], style = '.', color = file_info.color,  grid=False, alpha = .3, ax = ax )
    this_fit = tp.utils.fit_powerlaw(this_emsd, plot = False)
    ## Just plot top and bottom point, since it's a straight line
    this_fit_x = [1/fps, (1/fps) * len(this_emsd)]
    this_fit_y = [this_fit.A[0] * np.power(this_fit_x[0], this_fit.n[0]),
                         this_fit.A[0] * np.power(this_fit_x[1], this_fit.n[0])]
    plt.plot(this_fit_x, this_fit_y, color= file_info.color, alpha = .3)

    # Store this_fit.A[0] and this_fit.n[0] values
    fit_A_values.append(this_fit.A[0])
    fit_n_values.append(this_fit.n[0])

    # Append the handle and label to the lists
    line_handle, _ = plt.gca().get_legend_handles_labels()
    handles.extend(line_handle)
    labels.append(file_info.Description)

    # Map label to color
    if file_info.Description not in label_colors:
        label_colors[file_info.Description] = file_info.color

    

plt.xlabel('Time (s)')
plt.ylabel('eMSD ($\mu$m$^2$)')
plt.axis('equal')

# Add fit_A_values and fit_n_values as columns to combined_df
combined_df['coeff A [um^2/s]'] = fit_A_values
combined_df['alpha'] = fit_n_values

# Consolidate the figure legend to just the unique labels
new_handles = []
new_labels = []
handle_mapping = {}
for handle, label in zip(handles, labels):
    if label not in handle_mapping:
        handle_mapping[label] = handle
        new_handles.append(handle)
        new_labels.append(label)


# Set the modified handles and labels in the legend
legend = ax.legend(handles=new_handles, labels=new_labels, bbox_to_anchor=(1, 1), loc='upper left')

# Set the colors of legend lines based on label_colors dictionary
for line, label in zip(legend.get_lines(), new_labels):
    line.set_color(label_colors[label])

# Set the alpha value of legend lines to 1.0 (no transparency)
for line in legend.get_lines():
    line.set_alpha(1.0)


if saving:
    datestr = datestring()
    savefigure(datestr + '_eMSD')
    
end = time.time()
print('Elapsed: ', (end-start)/60, 'minutes')
beep()

In [None]:
"""this_emsd = tp.emsd(
        drift_subtracted_traj_df, 
        mpp=scaling, 
        fps=file_info.fps, 
        max_lagtime = emsd_max_lagtime).replace(0, np.nan).dropna()""";

In [None]:
#this_emsd.plot(loglog=True, figsize = [3,3], style = '.', color = file_info.color,  grid=False, alpha = 1,  )


In [None]:
this_emsd

In [None]:
#this_fit = tp.utils.fit_powerlaw(this_emsd, plot = True)

In [None]:
#this_fit

In [None]:
file_info['Analysis file']

In [None]:
combined_df

In [None]:
combined_df.columns

In [None]:
fig0, ax0 = plt.subplots(1, 1)


xlabel = 'avg PEG molec mass (g/mol)'
ylabel = 'alpha'
plt.plot(combined_df[xlabel], combined_df[ylabel], '.' )
plt.xlabel(xlabel)
plt.ylabel(ylabel);

In [None]:
ipynb_df = pd.read_pickle('C:\\Users\\vhorowit\\Desktop\\Rebecca Dalphin and Alex Axton\\ipynb summary.pkl')

# Convert both columns to lowercase before merging
ipynb_df['match_column'] = ipynb_df['ipynb file'].str.lower()
ipynb_df['match_column'] = ipynb_df['match_column'].str.replace('200 peg and 2000 peg', '200 and 2000 peg')
ipynb_df['match_column'] =  ipynb_df['match_column'].str.replace('8000 peg and 20000 peg', '8000 and 20000 peg')
ipynb_df['match_column'] =  ipynb_df['match_column'].str.replace('8000 peg @ 25mg per ml', '8000 peg @ 25mg per ml')
ipynb_df['match_column'] = ipynb_df['match_column'].str.strip()
combined_df['match_column'] = combined_df['control_tracer.pkl'].str.lower().str.replace('  ', ' ')
combined_df['match_column'] = combined_df['match_column'].str.strip()

merged_df = pd.merge(ipynb_df, combined_df, on='match_column', how='outer', indicator=True)

display(merged_df[['control_tracer.pkl', 'n', 'A', 'power n', 'coef A', 'alpha']])
#merged_df[ 'n'] - merged_df['power n'] ## no difference


## *** to do:
## Figure out why powers aren't the same numbers.
## n is from the ipynb
## power n is from Rebecca and Alex's spreadsheet. It matches the ipynb!
## alpha is my own calculations here. it does NOT match.

### 2023-08-10 I just fixed the spreadsheet file and I need to make sure to reload the correct csv

In [None]:
merged_df[['kurtosis (15 frames)','excess kurtosis (from cell 47) (frame 15)' ]]

In [None]:
merged_df

In [None]:
# Iterate through the 'color' column and update NaN values with 'C3'
for index, c in enumerate(merged_df['color']):
    if pd.isnull(c):
        merged_df.loc[index, 'color'] = 'C3'

In [None]:
fig0, ax1 = plt.subplots(1, 1, figsize = [3,3])


xlabel = 'avg PEG molec mass (g/mol)'
ylabel = 'power n'
plt.scatter(merged_df[xlabel], merged_df[ylabel], color = merged_df.color )
plt.xlabel(xlabel)
plt.ylabel(ylabel);


fig0, ax3 = plt.subplots(1, 1, figsize = [3,3])
xlabel = 'avg PEG molec mass (g/mol)'
ylabel = 'alpha'
plt.scatter(merged_df[xlabel], merged_df[ylabel], color = merged_df.color )
plt.xlabel(xlabel)
plt.ylabel(ylabel);

fig0, ax4 = plt.subplots(1, 1, figsize = [3,3])
xlabel = 'avg PEG molec mass (g/mol)'
ylabel = 'n'
plt.scatter(merged_df[xlabel], merged_df[ylabel], color = merged_df.color )
plt.xlabel(xlabel)
plt.ylabel(ylabel);


fig0, ax2 = plt.subplots(1, 1, figsize = [3,3])
xlabel = 'PEG 20000 concentration'
ylabel = 'power n'
plt.scatter(merged_df[xlabel], merged_df[ylabel], color = merged_df.color ) 
plt.xlabel(xlabel+ ' (mg/mL)')
plt.ylabel(ylabel);


if 'new_handles' in dir():
    for ax in [ax1, ax2]:
        # Set the modified handles and labels in the legend
        legend = ax.legend(handles=new_handles, labels=new_labels, bbox_to_anchor=(1, 1), loc='upper left')

        # Set the colors of legend lines based on label_colors dictionary
        for line, label in zip(legend.get_lines(), new_labels):
            line.set_color(label_colors[label])

        # Set the alpha value of legend lines to 1.0 (no transparency)
        for line in legend.get_lines():
            line.set_alpha(1.0)

In [None]:
fig0, ax0 = plt.subplots(1, 1, figsize = [3,3])
xlabel = 'PEG 20000 concentration'
ylabel = 'power n'
plt.scatter(combined_df[xlabel], combined_df[ylabel], color = 'red' , label = "R&A's spreadsheet")
ylabel = 'alpha'
plt.plot(combined_df[xlabel], combined_df[ylabel], '.',color = 'blue', label = "Viva's analysis" )
plt.xlabel(xlabel+ ' (mg/mL)')
plt.ylabel('eMSD Exponent');
plt.legend(bbox_to_anchor=(1, 1), loc='upper left');

In [None]:
combined_df.columns

In [None]:
combined_df[['control_tracer.pkl','power n', 'alpha','excess kurtosis (from cell 47) (frame 15)']] # There's no difference between the exponent Bess obtained and the exponent I obtained.

In [None]:
len(emsd_list) == len(shortfiles)

In [None]:
## Plot derivative of MSD plot.

plt.axhline(1, lw = 1 ,color = 'k')
for this_emsd, color, description in zip(emsd_list, combined_df.color, combined_df.Description):

    #plt.plot(np.log10(this_emsd.index),np.log10(this_emsd), '.')

    # Calculate the discrete derivative
    x = np.log10(this_emsd.index)
    y = np.log10(this_emsd)
    dy_dx = np.diff(y) / np.diff(x)

    # Plot the derivative
    plt.plot(x[1:], dy_dx, '.', color = color, lw = 0.8)
    plt.plot(x[1:], dy_dx, color = color, lw = 0.8, alpha = 0.3)
    
    # Apply smoothing to the derivative
    smoothed_dy_dx = savgol_filter(dy_dx, window_length=9, polyorder=3)
    plt.plot(x[1:], smoothed_dy_dx, label=description, color=color, lw = 1) # Plot the smoothed derivative

    plt.xlabel('log10 (time (s))')
    plt.ylabel('d log10 eMSD /d logt')

#plt.xlim(xmax = 0.25)
#plt.ylim(ymin = 0, ymax = 1.8)
plt.legend(bbox_to_anchor=(1, 1), loc='upper left')

In [None]:
combined_df['control_tracer.pkl']

In [None]:
## plot smoothed derivative

from scipy.signal import savgol_filter

plt.axhline(1, lw=0.5, color='gray')

palette_name = "tab20b" # Set the color palette
palette = sns.color_palette(palette_name) # Get the color palette

for i, (this_emsd, data_file) in enumerate(zip(emsd_list, combined_df['movie'])):
    x = np.log10(this_emsd.index)
    y = np.log10(this_emsd)
    dy_dx = np.diff(y) / np.diff(x)

    # Apply smoothing to the derivative
    smoothed_dy_dx = savgol_filter(dy_dx, window_length=30, polyorder=3)

    # Plot the smoothed derivative with the corresponding color from the palette
    plt.plot(x[1:], smoothed_dy_dx, label=data_file, color=palette[i % len(palette)], alpha=1, lw=3)


plt.xlabel('log10 (time (s))')
plt.ylabel('d/dt log10 (eMSD)')
plt.xlim(xmax = 0)
plt.ylim(ymin=0.39, ymax=1.1)
plt.title('smoothed')

plt.legend(bbox_to_anchor=(1, 1), loc='upper left')

In [None]:
combined_df

# Van Hove

In [None]:
def create_kurtosis_df_and_van_hove(file, filepath, df, fps, this_description, maxlagtime, skip, scaling, binsequence):

        tracksbyframex = df.set_index(['frame', 'particle'])['x'].unstack()
        tracksbyframey = df.set_index(['frame', 'particle'])['y'].unstack()
        trackshistlistx = []
        trackshistlisty = []
        dataframes = []
        this_lagtimelist = []
        warnings.filterwarnings("ignore", category=RuntimeWarning)

        j = 0
        for vanhove_lagtime_inframes in range(1, maxlagtime + 1, skip):  
            this_time = vanhove_lagtime_inframes / fps
            this_lagtimelist.append(this_time)

            # calculate vanhove / probability distribution function
            trackshistx = manualnorm(tp.motion.vanhove(
                tracksbyframex, lagtime=vanhove_lagtime_inframes, mpp=scaling, bins=binsequence, ensemble=True))
            trackshistx.name = 'x Probability distribution'
            trackshisty = manualnorm(tp.motion.vanhove(
                tracksbyframey, lagtime=vanhove_lagtime_inframes, mpp=scaling, bins=binsequence, ensemble=True))
            trackshisty.name = 'y Probability distribution'

            trackshistlistx.append(trackshistx)
            trackshistlisty.append(trackshisty)
            
            kx = trackshistx.kurtosis()
            ky = trackshisty.kurtosis()

            dict_row_pair = {'File': [filepath]*2,
                'Description': [this_description]*2,
                'time (s)': [this_time]*2,
                'kurtosis': [kx, ky],
                'xy': ['x', 'y'],
                'histogram': [trackshistx, trackshisty],          
                }
            
            dataframes.append(pd.DataFrame(dict_row_pair))
            j += 1
        
        warnings.filterwarnings("once", category=RuntimeWarning)
        
        one_movie_kurtosis_df = pd.concat(dataframes)
        return one_movie_kurtosis_df, trackshistlistx, trackshistlisty, this_lagtimelist

def create_sigma_df(this_description, filepath, this_lagtimelist, trackshistlistx, trackshistlisty):
    dataframes = []
    gaussian_fit_paramsxlist = []
    gaussian_fit_paramsylist = []
    gaussian_fit_covmxlist = []
    gaussian_fit_covmylist = []

    #j = 0
    for vanhove_lagtime_insecs, trackshist, trackshisty in zip(
        this_lagtimelist, trackshistlistx, trackshistlisty
    ):

        gaussian_fit_paramsx, gaussian_fit_covmx = sco.curve_fit(gaussian, trackshist.index, trackshist.values)
        gaussian_fit_paramsy, gaussian_fit_covmy = sco.curve_fit(gaussian, trackshisty.index, trackshisty.values)

        sigma_x_stderr = np.sqrt(gaussian_fit_covmx[sigma_index, sigma_index])
        sigma_y_stderr = np.sqrt(gaussian_fit_covmy[sigma_index, sigma_index])

        dict_row_pair = {'File': [filepath]*2,
                'Description': [this_description]*2,
                'time (s)': [vanhove_lagtime_insecs]*2,
                         ## Gaussian width should be positive:
                'width sigma': [abs(gaussian_fit_paramsx[sigma_index]), abs(gaussian_fit_paramsy[sigma_index])],
                'sigma stderr': [sigma_x_stderr, sigma_y_stderr],
                'xy': ['x', 'y'],
                'gaussian_param_height': [gaussian_fit_paramsx[height_index], gaussian_fit_paramsy[height_index]],
                'gaussian_param_center': [gaussian_fit_paramsx[center_index], gaussian_fit_paramsy[center_index]],
                'gaussian_fit_cov':[gaussian_fit_covmx, gaussian_fit_covmy],
                        }
        dataframes.append(pd.DataFrame(dict_row_pair))
        
        ## More info that might be useful to save as lists.
        gaussian_fit_paramsxlist.append(gaussian_fit_paramsx)
        gaussian_fit_paramsylist.append(gaussian_fit_paramsy) # gaussian width might need abs still
        gaussian_fit_covmxlist.append(gaussian_fit_covmx)
        gaussian_fit_covmylist.append(gaussian_fit_covmy)

        #j += 1
    
    one_movie_sigma_df = pd.concat(dataframes)
    
    return one_movie_sigma_df, \
        gaussian_fit_paramsxlist, gaussian_fit_paramsylist, gaussian_fit_covmxlist, gaussian_fit_covmylist

def remove_dup_columns(df):
    
    # Get a boolean mask of duplicated columns
    duplicate_mask = df.columns.duplicated()
    # Get the column indexes to keep (excluding duplicates)
    column_indexes_to_keep = [i for i in range(len(df.columns)) if not duplicate_mask[i]]
    # Create a new DataFrame with the selected columns (excluding duplicates)
    new_df = df.iloc[:, column_indexes_to_keep]
    
    return new_df

In [None]:
calculate_van_hove_slow = False
# It takes about two minutes to do 5 lagtimes.
# 120 lagtimes takes 73 minutes or 117 minutes.

In [None]:
## Calculate probability distribution function (van hove)

if calculate_van_hove_slow:
    maxlagtime = 120#40 # in number of frames  
            # for 120, maybe vanhove_max of 7. for 40, perhaps 4.  (120 took 99 minutes.)
    skip = 1#int(maxlagtime/3)
    vanhove_max_x = 7
    binwidth = 0.04
    figsize = [8, 8]

    start_time = time.time()
    
    #warnings.filterwarnings("ignore", category=DeprecationWarning)
    #warnings.filterwarnings("once", category=DeprecationWarning)

    # initializations before for loop
    binsequence = np.arange(-vanhove_max_x, vanhove_max_x, binwidth)
    trackshistlistlist = []
    trackshistlistlisty = []
    kurtosisdflist = []
    #kurtosisdflisty = []
    color_mapping = {}
    numplots = int(maxlagtime / skip)
    i = 0
    
    gaussian_df_list = []
    gaussian_fit_paramsxlistlist = []
    gaussian_fit_paramsylistlist = []
    gaussian_fit_covmxlistlist = []
    gaussian_fit_covmylistlist = []
    lagtimelistlist = []
    colorlist = []
        
    ## Calculate the Gaussian fitting parameters and store the information
    do_curvefit = True



    assert(len(shortfiles) ==len(df_list_nodrift)) 
    
    

    ## Iterate through each movie
    for (row_tuple, file, filepath, df, fps, ii) in tqdm_notebook(zip(combined_df.itertuples(), 
                                                             combined_df['control_tracer.pkl'], 
                                                             combined_df['Analysis file'], 
                                                             df_list_nodrift, 
                                                             combined_df.fps, 
                                                             range(len(combined_df))),
                                                                 total = len(df_list_nodrift)):
        if AlexRebecca:
            this_description = re.sub(r'\(\d\)$', '', file).strip()
        else:
            this_description = row_tuple.Description

        try:
            color = row_tuple.color # maybe combined_df already has a color column
        except:
            if this_description not in color_mapping:
                color_mapping[this_description] = plt.rcParams['axes.prop_cycle'].by_key()['color'][i % len(plt.rcParams['axes.prop_cycle'])]
                i += 1

            color = color_mapping[this_description]

        ## Calculate van hove and kurtosis for a range of lagtimes
        one_movie_kurtosis_df, trackshistlist, trackshistlisty, this_lagtimelist = \
            create_kurtosis_df_and_van_hove(file, filepath, df, fps, this_description, 
                               maxlagtime, skip, scaling, binsequence)
        
        kurtosisdflist.append(one_movie_kurtosis_df)
        trackshistlistlist.append(trackshistlist)
        trackshistlistlisty.append(trackshistlisty)
        colorlist.append(color)


        if do_curvefit:
            one_movie_sigma_df, \
                gaussian_fit_paramsxlist, gaussian_fit_paramsylist, gaussian_fit_covmxlist, gaussian_fit_covmylist = \
                create_sigma_df(this_description, filepath, this_lagtimelist, trackshistlist, trackshistlisty)

            """
            #== cut
            gaussian_fit_paramsxlist = []
            gaussian_fit_paramsylist = []
            gaussian_fit_covmxlist = []
            gaussian_fit_covmylist = []

            j = 0
            for vanhove_lagtime_insecs, trackshist, trackshisty in zip(
                this_lagtimelist, trackshistlist, trackshistlisty
            ):

                gaussian_fit_paramsx, gaussian_fit_covmx = sco.curve_fit(gaussian, trackshist.index, trackshist.values)
                gaussian_fit_paramsy, gaussian_fit_covmy = sco.curve_fit(gaussian, trackshisty.index, trackshisty.values)

                sigma_x_stderr = np.sqrt(gaussian_fit_covmx[sigma_index, sigma_index])
                sigma_y_stderr = np.sqrt(gaussian_fit_covmy[sigma_index, sigma_index])

                
                for p in range(2):
                    big_filepathlist.append(filename)
                    big_descriptionlist.append(remove_number_parentheses(file))
                    big_tlist.append(vanhove_lagtime_insecs)

                big_xy_list.append('x')
                big_sigmalist.append(gaussian_fit_paramsx[sigma_index])
                big_sigma_stderr.append(sigma_x_stderr)
                
                big_xy_list.append('y')
                big_sigmalist.append(gaussian_fit_paramsy[sigma_index])
                big_sigma_stderr.append(sigma_y_stderr)

                gaussian_fit_paramsxlist.append(gaussian_fit_paramsx)
                gaussian_fit_paramsylist.append(gaussian_fit_paramsy)
                gaussian_fit_covmxlist.append(gaussian_fit_covmx)
                gaussian_fit_covmylist.append(gaussian_fit_covmy)

                j += 1
                #==cut
            """
                

            # Store the lists for each file and lag time
            gaussian_df_list.append(one_movie_sigma_df)
            lagtimelistlist.append(this_lagtimelist)
            gaussian_fit_paramsxlistlist.append(gaussian_fit_paramsxlist)
            gaussian_fit_paramsylistlist.append(gaussian_fit_paramsylist)
            gaussian_fit_covmxlistlist.append(gaussian_fit_covmxlist)
            gaussian_fit_covmylistlist.append(gaussian_fit_covmylist)
    

    
    big_kurtosis_df = pd.concat(kurtosisdflist)
    big_kurtosis_df = big_kurtosis_df.set_index(["time (s)", "File", "xy"])

    big_gaussian_df = pd.concat(gaussian_df_list)
    big_gaussian_df = big_gaussian_df.set_index(["time (s)", "File", "xy"])

    
    big_data_df = pd.concat([big_kurtosis_df, big_gaussian_df], axis = 1)
    
    ## Saving results
    # Create a dictionary to save
    data_dict_vanhove = {
        'filename_list': filename_list,
        'binsequence': binsequence,
        'trackshistlistlist': trackshistlistlist,
        'trackshistlistlisty': trackshistlistlisty,
        'kurtosisdflist': kurtosisdflist,
        #'kurtosisdflisty': kurtosisdflisty,
        'gaussian_fit_paramsxlistlist': gaussian_fit_paramsxlistlist,
        'gaussian_fit_paramsylistlist': gaussian_fit_paramsylistlist,
        'gaussian_fit_covmxlistlist': gaussian_fit_covmxlistlist,
        'gaussian_fit_covmylistlist': gaussian_fit_covmylistlist,
        'lagtimelistlist': lagtimelistlist,
        'maxlagtime': maxlagtime,
        'skip': skip,
        'scaling': scaling,
        'fps': fps,
        'colorlist': colorlist,
        'big_data_df': big_data_df, # added 2023-06-22
    }

    today = date.today().isoformat() # Get today's date in ISO format
    pickle_file = f'data_dict_vanhove_{today}_{len(this_lagtimelist)}lagtimes.pkl' # Create the pickle file name
    
    os.chdir(savefolder)
    # Save the dictionary to a pickle file
    with open(pickle_file, 'wb') as f:
        pickle.dump(data_dict_vanhove, f)
        
    print("Saved:", os.path.join(savefolder, pickle_file))   
    beep()

    #calculate_van_hove_slow = False

    end_time = time.time()
    execution_time_minutes = (end_time - start_time) / 60
    print("Execution time:", execution_time_minutes, "minutes")
else: # open saved file
    root = tk.Tk() # Create a Tkinter root window
    root.lift()
    root.withdraw()  # Hide the root window
    
    print('Look for the file dialogue window!')
    beep()
    beep()

    # Open the file dialog to select the pickle file
    file_path = filedialog.askopenfilename(filetypes=[("Pickle Files", "*.pkl")])

    # Check if a file was selected
    if file_path:
        # Process the selected pickle file
        with open(file_path, 'rb') as f:
            data_dict_vanhove = pickle.load(f)
        # Extract the variables from the dictionary
        filename_list = data_dict_vanhove['filename_list']
        binsequence = data_dict_vanhove['binsequence']
        trackshistlistlist = data_dict_vanhove['trackshistlistlist']
        trackshistlistlisty = data_dict_vanhove['trackshistlistlisty']
        kurtosisdflist = data_dict_vanhove['kurtosisdflist']
        try:
            kurtosisdflisty = data_dict_vanhove['kurtosisdflisty']
            print('Using kurtosisdflisty')
        except:
            pass
        gaussian_fit_paramsxlistlist = data_dict_vanhove['gaussian_fit_paramsxlistlist']
        gaussian_fit_paramsylistlist = data_dict_vanhove['gaussian_fit_paramsylistlist']
        gaussian_fit_covmxlistlist = data_dict_vanhove['gaussian_fit_covmxlistlist']
        gaussian_fit_covmylistlist = data_dict_vanhove['gaussian_fit_covmylistlist']
        try:
            lagtimelist = data_dict_vanhove['lagtimelist']  # in seconds
        except:
            lagtimelistlist = data_dict_vanhove['lagtimelistlist']  # in seconds
        maxlagtime = data_dict_vanhove['maxlagtime']  # in number of frames
        skip = data_dict_vanhove['skip']  # in number of frames
        scaling = data_dict_vanhove['scaling']  # in microns per pixel
        fps = data_dict_vanhove['fps']  # in Hz
        try:
            big_data_df = data_dict_vanhove['big_data_df']
            
            ## If big_data_df has duplicated columns, this will remove them.
            # Get a boolean mask of duplicated columns
            duplicate_mask = big_data_df.columns.duplicated()
            # Get the column indexes to keep (excluding duplicates)
            column_indexes_to_keep = [i for i in range(len(big_data_df.columns)) if not duplicate_mask[i]]
            # Create a new DataFrame with the selected columns
            big_data_df = big_data_df.iloc[:, column_indexes_to_keep]
        except:
            print('Could not extract big_data_df; this must be an older file.')
        try:
            colorlist = data_dict_vanhove['colorlist']
        except:
            print('Could not extract colorlist; this is a file from June 29 or earlier.')

        print('Loaded file', file_path)
    else:
        print("No file selected.")
        
try:
    big_data_df = big_data_df.reset_index()
    big_data_df = remove_dup_columns(big_data_df)
    
    big_data_df['width sigma'] = abs(big_data_df['width sigma']) # width should be positive
    
    ## Add color column to big_data_df.
    big_data_df = big_data_df.merge(combined_df[['Analysis file', 'color']], left_on='File', right_on='Analysis file', how='left')

except NameError:
    print('No big_data_df')

In [None]:
big_data_df.columns

In [None]:
big_data_df

In [None]:
kurtosisdf = kurtosisdflist[0]
kurtosisdf.File.iloc[0]

In [None]:
combined_df['Analysis file'][0]

In [None]:
len(big_data_df)

In [None]:
maxlagtime

In [None]:
try:
    if 'lagtimelist' in dir():
        assert  (int(maxlagtime/skip) == len(lagtimelist))
except AssertionError:
    if 'this_lagtimelist' in dir():
        assert ((int(maxlagtime/skip) == len(this_lagtimelist)))

In [None]:
#assert(len(big_gaussian_df) == len(shortfiles) * len(this_lagtimelist) * 2) # 2 for xy

In [None]:
big_data_df.describe()

In [None]:
plt.scatter(1/(big_data_df['width sigma']), big_data_df['kurtosis'], color =big_data_df.color, alpha = .15 )
plt.ylabel('kurtosis')
plt.xlabel('1/Gaussian Sigma (1/$\mu$m)')
#plt.yscale("log")
#plt.xscale("log")

if saving:
    datestr = datestring()
    savefigure(datestr + 'kurtosis vs gaussian width')
plt.show()

In [None]:
big_data_df['time (s)'].plot()

In [None]:
big_data_df['time (s)'].unique()

In [None]:
big_data_df.columns

In [None]:
np.sort(big_data_df['time (s)'].unique())

In [None]:
big_data_df['time (s)'].unique()[[5,10, -20]]

In [None]:
saving = True

## key, plot title, plot ylabel
plotable1 = ('width sigma', 'Gaussian Sigma', 'Gaussian Sigma ($\mu$m)' )
plotable2 = ('kurtosis', 'kurtosis', 'kurtosis')


for time_value in big_data_df['time (s)'].unique()[[5,10, -20,-1]]:
#for time_value in [0.05784025, 0.40488172,0.40976889, 3.065533,2.9913129, 3.07326668 ]:#np.sort(big_data_df['time (s)'].unique()):
#for time_value in np.sort(big_data_df['time (s)'].unique()):
    for plotable in [plotable1, plotable2]:
        key = plotable[0]
        plottitle = plotable[1]
        ylabel = plotable[2]
    
        # Filter the DataFrame based on approximate equality
        data = big_data_df[np.isclose(big_data_df['time (s)'], time_value)]

        # Define the order for the horizontal axis (largest to smallest)
        order = data.groupby('Description')[key].mean().sort_values(ascending=False).index

        # Get unique color values from the 'color' column
        color_palette = list(data['color'].unique())

        if debug:
            display(data[['Description', 'color']])
            display(color_palette)
            display(order)

        # Create a dictionary to map description to color
        color_dict = dict(zip(data['Description'], data['color']))

        # Generate color_palette in the desired order
        color_palette = [color_dict[desc] for desc in order if desc in color_dict]

        # Create a figure and axes
        fig, ax = plt.subplots()

        # Plot the violin plot with the custom color palette
        sns.violinplot(x='Description', y=key, data=data, 
                       #color='gray', 
                       palette=color_palette,
                       #zorder=1,
                       inner = None, # no automatic boxplot
                       split=False, order=order)
        for violin in ax.collections:
            violin.set_alpha(0.5)
            violin.set_zorder(1)  # Set higher zorder for the violins


        sns.boxplot(x='Description', y=key, data=data, order = order,palette=color_palette,
                    showfliers=False, # don't show outliers
                   width=0.06, #zorder=2,
                )

        #sns.stripplot(x='Description', y=key, hue='xy', data=data, order=order, #palette='dark', 
        #              dodge=True,  jitter=0.01, size=5)
        sns.swarmplot(x='Description', y=key, hue='xy', data=data, order=order, palette='dark', 
                  dodge=True, size=5, edgecolor='gray', linewidth=0.5, )


        plt.xlabel('')
        plt.ylabel(ylabel)
        plt.title(plottitle + ' at Time ' + str(time_value) + ' s')
        plt.xticks(rotation=90)

        #plt.legend(title='Variable')

        os.chdir(r'C:\Users\vhorowit\Documents\fig-expt')
        datestr = datestring()
        if saving:
            savefigure(datestr + ' ' +  plottitle + ' violin')
        plt.show()

In [None]:
len(filename_list)

In [None]:
len(kurtosisdflist)

In [None]:
# Plot previously calculated probability distribution function

showprobdist = False
show_curvefit = True
plots_per_row = 5
debug = False


if showprobdist:
    try:
        numplots =  big_data_df['time (s)'].nunique()
    except:
        numplots = int(maxlagtime / skip)
    plots_per_column = int(math.ceil(numplots / plots_per_row))
    figsize = [8, max(2, plots_per_column*1.5)]
    fig, axs = plt.subplots(plots_per_column, plots_per_row, figsize=figsize, dpi=300)
    plt.subplots_adjust(top=.96)
    fig3, axsy = plt.subplots(plots_per_column, plots_per_row, figsize=figsize, dpi=300)
    plt.subplots_adjust(top=.96)

    plt.subplots_adjust(hspace=0.4)  # Adjust the spacing between subplots

    for i in range(plots_per_column-1):
        for j in range(plots_per_row):
            axs[i, j].set_xticklabels([])  # Set shared x-axis for axs
            axs[i, j].tick_params(bottom=True, labelbottom=True)  # Customize tick labels for the last row of axs

            axsy[i, j].set_xticklabels([])  # Set shared x-axis for axsy
            axsy[i, j].tick_params(bottom=True, labelbottom=True)  # Customize tick labels for the last row of axsy

    fig.suptitle('Probability distribution in x direction')
    fig3.suptitle('Probability distribution in y direction')

fig2, ax2 = plt.subplots(1, 1, )
fig4, ax4 = plt.subplots(1, 1, )

color_mapping = {}
if showprobdist:
    if not show_curvefit:
        xlimx = np.zeros_like(axs.flat)
        xlimy = np.zeros_like(axs.flat)
        ylimx = np.zeros_like(axsy.flat)
        ylimy = np.zeros_like(axsy.flat)
    
warnings.filterwarnings("ignore", category=DeprecationWarning) # Not my fault. There's some dependency.


i = 0
j = 0

if 'big_data_df' in dir():
    
    # Sort the DataFrame by 'time (s)'
    big_data_df_time_sorted = big_data_df.sort_values('time (s)')
    prevtime = big_data_df_time_sorted['time (s)'].iloc[0]
    
    for index, row in tqdm_notebook(big_data_df_time_sorted.iterrows(), total = len(big_data_df_time_sorted)):
        color = row.color #color_mapping[row.Description]
        
        if not np.isclose(row['time (s)'], prevtime):
            if debug:
                print('New time:',1000*row['time (s)'], 'ms. Index:', index, '. Previous time:', prevtime*1000, 'ms' )
            j += 1
        prevtime = row['time (s)']

        if showprobdist:
            if row.xy == 'x':
                ax = axs.flat[j]
            else:
                ax = axsy.flat[j]

            if show_curvefit:
                try:
                    #display(row.histogram.index)
                    gaussian_fit = gaussian(row.histogram.index, row.gaussian_param_height,
                                             row.gaussian_param_center, row['width sigma'])

                    ax.plot(row.histogram.index, gaussian_fit, color=color, linewidth=0.5, alpha=0.3)

                    if 'xlimx' in dir() and xlimx[j] != 0:
                        if xlimx[j][0] < 0: ## error checking. I don't know why this would be negative but.
                            (axs.flat[j]).set_xlim(xmax = xlimx[j][1])
                        else:
                            (axs.flat[j]).set_xlim(xlimx[j])
                        if ylimx[j][0] < 0:
                            (axs.flat[j]).set_ylim(ymax = ylimx[j][1])
                        else:
                            (axs.flat[j]).set_ylim(ylimx[j])
                        if xlimy[j][0] < 0:
                            (axs.flat[j]).set_xlim(xmax = xlimy[j][1])
                        else:
                            (axsy.flat[j]).set_xlim(xlimy[j])
                        if ylimy[j][0] < 0:
                            (axs.flat[j]).set_ylim(ymax = ylimy[j][1])
                        else:
                            (axsy.flat[j]).set_ylim(ylimy[j])
                except:
                    print('Failed to show Gaussian fits.')
            #ax4.scatter(x=vanhove_lagtime_insecs,y=gaussian_fit_paramsx[2], color = color, alpha = 0.3)

            # Plot vanhove datapoints 
            row.histogram.plot(
                marker=".",
                markersize=1,
                linestyle="",
                ax=ax,
                label=file,
                color=color,
                alpha=0.3,
            )

            ax.set_title("{:.2f} ms".format(1000 * row['time (s)']))
            ax.set_xticks([int(row.histogram.index.min())+1, 0, int(row.histogram.index.max())])
            ax.set_yscale("log")

            if not show_curvefit:
                xlimx[j] = axs.flat[j].get_xlim()
                ylimx[j] = axs.flat[j].get_ylim()
                xlimy[j] = axsy.flat[j].get_xlim()
                ylimy[j] = axsy.flat[j].get_ylim()

            
    ## Plot gaussian widths versus lag time
    for lagtimelist, gaussian_fit_paramsxlist, gaussian_fit_paramsylist, color in \
        zip(lagtimelistlist,gaussian_fit_paramsxlistlist,gaussian_fit_paramsylistlist, colorlist):
        # Gaussian widths
        ax4.plot(lagtimelist, np.abs(np.array(gaussian_fit_paramsxlist)[:, 2]), alpha = .5, color = color)
        ax4.plot(lagtimelist, np.abs(np.array(gaussian_fit_paramsylist)[:, 2]),  alpha = .5, color = color)

    ## Plot kurtosis versus lag time
    # Create empty lists to store handles and labels
    handles = []
    labels = []

    # Dictionary to map labels to colors
    label_colors = {}

    for lagtimelist, kurtosisdf, color in zip(lagtimelistlist, kurtosisdflist, colorlist):
        for dim in ['x', 'y']:
            index = combined_df[combined_df['Analysis file'] == kurtosisdf.File.iloc[0]].index[0]
            color = combined_df.loc[index, 'color']
            label = combined_df.loc[index, 'Description']

            # Plot Kurtosis
            kurtosisdfslice = kurtosisdf[kurtosisdf.xy == dim]
            line = kurtosisdfslice.set_index("time (s)").plot(ax=ax2, color=color, alpha=0.3, label=f'{label} {dim} kurtosis')

            # Get the handle and label of the plotted line
            line_handle, _ = line.get_legend_handles_labels()

            # Append the handle and label to the lists
            handles.extend(line_handle)
            labels.append(label)

            # Map label to color
            if label not in label_colors:
                label_colors[label] = color

    # Consolidate the figure legend to just the unique labels
    new_handles = []
    new_labels = []
    handle_mapping = {}
    for handle, label in zip(handles, labels):
        if label not in handle_mapping:
            handle_mapping[label] = handle
            new_handles.append(handle)
            new_labels.append(label)

    for ax in [ax2, ax4]:
        # Set the modified handles and labels in the legend
        legend = ax.legend(handles=new_handles, labels=new_labels, bbox_to_anchor=(1, 1), loc='upper left')

        # Set the colors of legend lines based on label_colors dictionary
        for line, label in zip(legend.get_lines(), new_labels):
            line.set_color(label_colors[label])

        # Set the alpha value of legend lines to 1.0 (no transparency)
        for line in legend.get_lines():
            line.set_alpha(1.0)

    

elif 'kurtosisdflisty' in dir():

    assert len(filename_list) == len(trackshistlistlist)
    assert len(filename_list) == len(trackshistlistlisty)
    assert len(filename_list) == len(kurtosisdflist)
    assert len(filename_list) == len(kurtosisdflisty)



    for file, lagtimelist, trackshistlist, trackshistlisty, \
        kurtosisdf, kurtosisdfy, gaussian_fit_paramsxlist, gaussian_fit_paramsylist \
        in tqdm_notebook(
            zip(shortfiles,lagtimelistlist, trackshistlistlist, trackshistlistlisty, \
                kurtosisdflist, kurtosisdflisty, gaussian_fit_paramsxlistlist, gaussian_fit_paramsylistlist),
        total=len(shortfiles),
    ):
        # Vary colors if base_filename is different
        base_filename = re.sub(r"\(\d\)$", "", file).strip()
        if base_filename not in color_mapping:
            color_mapping[base_filename] = plt.rcParams["axes.prop_cycle"].by_key()["color"][
                i % len(plt.rcParams["axes.prop_cycle"])
            ]
            i += 1
        color = color_mapping[base_filename]

        j = 0

        for vanhove_lagtime_insecs, trackshist, trackshisty, gaussian_fit_paramsx, gaussian_fit_paramsy in zip(
            lagtimelist, trackshistlist, trackshistlisty, gaussian_fit_paramsxlist, gaussian_fit_paramsylist
        ):
            if showprobdist:
                if show_curvefit:
                    try:
                        gaussian_fitx = gaussian(trackshist.index, gaussian_fit_paramsx[0],
                                                 gaussian_fit_paramsx[1], gaussian_fit_paramsx[2])
                        gaussian_fity = gaussian(trackshisty.index, gaussian_fit_paramsy[0],
                                                 gaussian_fit_paramsy[1], gaussian_fit_paramsy[2])

                        (axs.flat[j]).plot(trackshist.index, gaussian_fitx, color=color, linewidth=0.3, alpha=0.3)
                        (axsy.flat[j]).plot(trackshisty.index, gaussian_fity, color=color, linewidth=0.3, alpha=0.3)

                        try:
                            (axs.flat[j]).set_xlim(xlimx[j])
                            (axs.flat[j]).set_ylim(ylimx[j])
                            (axsy.flat[j]).set_xlim(xlimy[j])
                            (axsy.flat[j]).set_ylim(ylimy[j])
                        except:
                            pass
                    except:
                        print('Failed to show Gaussian fits.')
                    #ax4.scatter(x=vanhove_lagtime_insecs,y=gaussian_fit_paramsx[2], color = color, alpha = 0.3)

                # Plot vanhove datapoints 
                trackshist.plot(
                    marker=".",
                    markersize=1,
                    linestyle="",
                    ax=axs.flat[j],
                    label=file + " x",
                    color=color,
                    alpha=0.3,
                )

                trackshisty.plot(
                    marker=".",
                    markersize=1,
                    linestyle="",
                    ax=axsy.flat[j],
                    label=file + " y",
                    color=color,
                    alpha=0.3,
                )

                for ax in [axs.flat[j], axsy.flat[j]]:
                    ax.set_title("{:.2f} ms".format(1000 * vanhove_lagtime_insecs))
                    ax.set_xticks([int(trackshist.index.min())+1, 0, int(trackshist.index.max())])
                    ax.set_yscale("log")
                if not show_curvefit:
                    xlimx[j] = axs.flat[j].get_xlim()
                    ylimx[j] = axs.flat[j].get_ylim()
                    xlimy[j] = axsy.flat[j].get_xlim()
                    ylimy[j] = axsy.flat[j].get_ylim()
                j += 1
        # end showprobdist

        # Gaussian widths
        ax4.plot(lagtimelist, np.abs(np.array(gaussian_fit_paramsxlist)[:, sigma_index]), color=color, alpha = .5)
        ax4.plot(lagtimelist, np.abs(np.array(gaussian_fit_paramsylist)[:, sigma_index]), color=color, alpha = .5)

        # Kurtosis
        kurtosisdf.set_index("time (s)").plot(ax=ax2, color=color, label=file + " x", alpha = .3)
        kurtosisdfy.set_index("time (s)").plot(ax=ax2, color=color, label=file + " y", alpha = .3)

    # Consolidate the figure legend to just the unique filenames (not counting (1), (2), (3))

    handles, legend_labels = ax2.get_legend_handles_labels()
    short_labels = []
    handle_mapping = {}
    for handle, label in zip(handles, legend_labels):
        match = re.search(r'(.+?) \(\d+\) kurtosis', label)  # Extract the short file names using regular expressions
        if match:
            short_label = match.group(1)
            if short_label not in handle_mapping:
                handle_mapping[short_label] = handle
                short_labels.append(short_label)
        else:
            if label not in handle_mapping:
                handle_mapping[label] = handle
                short_labels.append(label)

    new_handles = [] # Create new handles
    for label in short_labels:
        handle = handle_mapping[label]
        # Create new Line2D objects with solid lines and no alpha
        new_handle = mlines.Line2D([], [], color=handle.get_color(), linestyle='-', linewidth=handle.get_linewidth())
        new_handles.append(new_handle)

    # Set the modified handles and labels
    ax2.legend(handles=new_handles, labels=short_labels, bbox_to_anchor=(1, 1), loc='upper left')  # Move the legend to the right        
        

        
        
warnings.resetwarnings()

datestr = datestring()

plt.sca(ax4)
plt.xlabel('Time (s)')
plt.ylabel('Gaussian width (um)')
plt.sca(ax2)
plt.xlabel('Time (s)')
plt.ylabel("Kurtosis")

if showprobdist:
    fig3.tight_layout()
    fig.tight_layout()



beep()

In [None]:
saving = True

if saving:
    ## Save figures
    if showprobdist:
        plt.figure(fig)
        savefigure(datestr + 'PDx')
    plt.figure(fig2)
    savefigure(datestr + 'kurtosis')
    if showprobdist:
        plt.figure(fig3)
        savefigure(datestr + 'PDy')
    plt.figure(fig4)
    savefigure(datestr + 'Gaussian width')
os.getcwd()

In [None]:

fig, ax = plt.subplots(1,1)


if 'kurtosisdflisty' in dir():
    row = 2  # Select the desired row
    shortnames = []  # List to store the modified short names
    kurtosis_values = []  # List to store the kurtosis values

    for kurtosisdf in kurtosisdflist + kurtosisdflisty:
        shortname = kurtosisdf.columns[1]  # Extract the short name from the column name
        shortname = re.sub(r'\(\d\)\s*kurtosis', '', shortname)  # Remove the "(1) kurtosis" or "(2) kurtosis" part
        kurtosis_value = kurtosisdf.loc[row][1]  # Extract the kurtosis value from the specified row

        shortnames.append(shortname)
        kurtosis_values.append(kurtosis_value)

    # Create plot
    plt.scatter(shortnames, kurtosis_values)
    plt.xlabel('Short Names')
    plt.ylabel('Kurtosis')
    plt.title('Kurtosis Values at Time ' + str(kurtosisdflist[0].loc[row][0]) + ' s')  # Use the time from the first DataFrame
    plt.xticks(rotation=90)  # Rotate x-axis labels if needed
    plt.show()

else:
    assert len(big_data_df.Description) == len(big_data_df['kurtosis'])
    
    timechoice = big_data_df['time (s)'].unique()[1]
    
    
    big_data_df_time_slice = big_data_df[big_data_df['time (s)'] == timechoice ]
    
    plt.scatter(big_data_df_time_slice.Description, big_data_df_time_slice['kurtosis'])
    plt.xticks(rotation=90)  # Rotate x-axis labels if needed

    plt.title('Kurtosis Values at Time ' + str(timechoice) + ' s')  # Use the time from the first DataFrame
    plt.show()

In [None]:
if 'kurtosisdflisty' in dir() and kurtosis_values in dir():
    # Combine the shortnames and kurtosis_values into a list of tuples
    data = list(zip(shortnames, kurtosis_values))

    # Sort the list of tuples based on the kurtosis values
    sorted_data = sorted(data, key=lambda x: x[1])  # Sort by the second element of each tuple

    # Unzip the sorted list of tuples back into separate lists
    shortnames, kurtosis_values = zip(*sorted_data)

    # Create a bar plot
    plt.scatter(shortnames, kurtosis_values)
    #plt.xlabel('Short Names')
    plt.ylabel('Kurtosis')
    plt.title('Kurtosis Values at Time ' + str(kurtosisdflist[0].loc[row][0]) + ' s')  # Use the time from the first DataFrame
    plt.xticks(rotation=90)  # Rotate x-axis labels if needed
    plt.show()

    # Create a violin plot
    data_df = pd.DataFrame({'Short Names': shortnames, 'Kurtosis': kurtosis_values})

    sns.violinplot(x='Short Names', y='Kurtosis', data=data_df, color='skyblue')
    sns.stripplot(x='Short Names', y='Kurtosis', data=data_df, color='black', jitter=True, size=5)

    plt.xlabel('')
    plt.ylabel('Kurtosis')
    plt.title('Kurtosis Values at Time ' + str(kurtosisdf.loc[row][0]) + ' s')
    plt.xticks(rotation=90);

    os.chdir(savefolder)
    saving = False
    if saving:
        datestr = datestring()
        savefigure(datestr + 'kurtosis violin plot')


In [None]:
row = 2  # Select the desired row

shortnamesx = []  # List to store the modified short names
shortnamesy=[]
kurtosis_xvalues = []  # List to store the kurtosis values
kurtosis_yvalues = []  # List to store the kurtosis values

for kurtosisdf in kurtosisdflist:
    shortname = kurtosisdf.columns[1]  # Extract the short name from the column name
    shortname = re.sub(r'\(\d\)\s*kurtosis', '', shortname)  # Remove the "(1) kurtosis" or "(2) kurtosis" part
    kurtosis_value = kurtosisdf.loc[row][1]  # Extract the kurtosis value from the specified row
    
    shortnamesx.append(shortname)
    kurtosis_xvalues.append(kurtosis_value)
    
if 'kurtosisdflisty' in dir():
    for kurtosisdf in kurtosisdflisty:
        shortname = kurtosisdf.columns[1]  # Extract the short name from the column name
        shortname = re.sub(r'\(\d\)\s*kurtosis', '', shortname)  # Remove the "(1) kurtosis" or "(2) kurtosis" part
        kurtosis_value = kurtosisdf.loc[row][1]  # Extract the kurtosis value from the specified row

        shortnamesy.append(shortname)
        kurtosis_yvalues.append(kurtosis_value)

# Create a bar plot
plt.scatter(shortnamesx, kurtosis_xvalues)
plt.scatter(shortnamesy, kurtosis_yvalues)
#plt.xlabel('Short Names')
plt.ylabel('Kurtosis')
plt.title('Kurtosis Values at Time ' + str(kurtosisdf.loc[row][0]) + ' s')
plt.xticks(rotation=90);  # Rotate x-axis labels