# Unpack Packages

In [1]:
import os
import csv
import sys
module_path = os.path.abspath(os.path.join('C:\\Users\\koolk\\Desktop\\brain-diffusion\\Chad_functions_and_unittests'))
if module_path not in sys.path:
    sys.path.append(module_path)
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import scipy.optimize as opt
import scipy.stats as stat
from operator import itemgetter
import random
import numpy as np
import numpy.ma as ma
import numpy.linalg as la

pi = np.pi
sin = np.sin
cos = np.cos

In [2]:
from MSD_utils import get_data_pups, build_time_array, return_average, avg_all, graph_single_variable
from MSD_utils import SD_all, return_SD, range_and_ticks, choose_y_axis_params, data_prep_for_plotting_pups
from MSD_utils import fill_in_and_split, plot_traj_length_histogram, plot_traj, filter_out_short_traj
from MSD_utils import plot_trajectory_overlay, quality_control

from MSD_utils import diffusion_coefficient_point_derivative, diffusion_coefficient_linear_regression
from MSD_utils import calculate_diffusion_coefficients, diffusion_bar_chart, summary_barcharts
from MSD_utils import calculate_MMSDs, plot_general_histogram, plot_MSD_histogram, plot_all_MSD_histograms
from MSD_utils import fillin2, MSD_iteration, vectorized_MMSD_calcs
from MSD_utils import get_data_gels, data_prep_for_plotting_gels, plot_all_MSD_histograms_gels, quality_control_gels
from MSD_utils import calculate_diffusion_coefficients_gels

In [3]:
folder = "./{functionality}/{slic}/"
path = "./{functionality}/{slic}/geoM2xy_{sample_name}.csv"
path2 = "./{functionality}/{slic}/Traj_{sample_name}.tif.csv"

frames = 480
SD_frames = [1, 10, 19, 28]
conversion = (0.16, 9.91, 1)#(0.3, 3.95, 1)
to_frame = 60
dimension = "2D"
time_to_calculate = 1

base = "0-4p_agarose"
base_name = "RED"
test_bins = np.linspace(0, 75, 76)

# name = 'RED_KO_PEG_P1_S1_cortex'
cut = 4
totvids = 1
frame_m = 480  # atm I can't go lower than the actual value.
conversion = (0.16, 9.91, 1)

parameters = {}
parameters["channels"] = ["RED"]
#parameters["genotypes"] = ["WT"]
#parameters["pups"] = ["P1", "P2", "P3"]
parameters["surface functionalities"] = ["PEG", "nPEG"]
parameters["slices"] = ["1", "2", "3", "4", "5", "6", "7", "8"]
parameters["regions"] = ["cortex"]
parameters["replicates"] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
parameters["slice suffixes"] = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']


channels = parameters["channels"]
#genotypes = parameters["genotypes"]
#pups = parameters["pups"]
surface_functionalities = parameters["surface functionalities"]
slices = parameters["slices"]
#regions = parameters["regions"]
replicates = parameters["replicates"]
suffixes = parameters["slice suffixes"]

y_range, ticks_y, dec_y, x_range, ticks_x, dec_x = 8, 2, 1, 3, 1, 1
frames = 480
interv = 50

In [None]:
def MSD_iteration(folder, name, cut, totvids, conversion, frames):
    """
    Cleans up data for MSD analysis from csv files.  Outputs in form of
    dictionaries.
    """

    trajectory = dict()
    tots = dict()  # Total particles in each video
    newtots = dict()  # Cumulative total particles.
    newtots[0] = 0
    tlen = dict()
    tlength = dict()
    tlength[0] = 0

    for num in range(1, totvids + 1):
        trajectory[num] = np.genfromtxt(folder+'Traj_{}_{}.tif.csv'.format(name, num), delimiter=",")
        trajectory[num] = np.delete(trajectory[num], 0, 1)

        tots[num] = trajectory[num][-1, 0].astype(np.int64)
        newtots[num] = newtots[num-1] + tots[num]

        tlen[num] = trajectory[num].shape[0]
        tlength[num] = tlength[num-1] + tlen[num]

    placeholder = np.zeros((tlength[totvids], 11))

    for num in range(1, totvids + 1):
        placeholder[tlength[num-1]:tlength[num], :] = trajectory[num]
        placeholder[tlength[num-1]:tlength[num], 0] = placeholder[tlength[num-1]:tlength[num], 0] + newtots[num-1]

    dataset = dict()
    rawdataset = np.zeros(placeholder.shape)
    particles = placeholder[:, 0]
    total = int(max(particles))
    total1 = total + 1
    rawdataset = placeholder[:, :]

    fixed = np.zeros(placeholder.shape)
    fixed[:, 0:2] = rawdataset[:, 0:2]
    fixed[:, 2:4] = conversion[0] * rawdataset[:, 2:4]
    fixed[:, 4] = conversion[2] * rawdataset[:, 4]

    x = np.zeros((frames, total1 - 1))
    y = np.zeros((frames, total1 - 1))
    xs = np.zeros((frames, total1 - 1))
    ys = np.zeros((frames, total1 - 1))
    
    nones = 0
    cutoff = cut
    for num in range(1, total1):

        hold = np.where(particles == num)
        itindex = hold[0]
        min1 = min(itindex)
        max1 = max(itindex)
        
        if max1 - min1 < cutoff:
            nones = nones + 1
        else:
            holdplease = fillin2(fixed[min1:max1+1, 0:5])
            x[int(holdplease[0, 1]):int(holdplease[-1, 1])+1, num - nones - 1] = holdplease[:, 2]
            y[int(holdplease[0, 1]):int(holdplease[-1, 1])+1, num - nones - 1] = holdplease[:, 3]

            xs[0:int(holdplease[-1, 1])+1-int(holdplease[0, 1]), num - nones - 1] = holdplease[:, 2]
            ys[0:int(holdplease[-1, 1])+1-int(holdplease[0, 1]), num - nones - 1] = holdplease[:, 3]


    total1 = total1 - nones - 1
    x_m = x[:, :total1-1]
    y_m = y[:, :total1-1]
    xs_m = xs[:, :total1-1]
    ys_m = ys[:, :total1-1]
    

    print('Total particles after merging datasets and filtering short trajectories:', total1)
    return total1, xs_m, ys_m, x_m, y_m

In [None]:
def vectorized_MMSD_calcs(frames, total1, xs_m, ys_m, x_m, y_m, frame_m):

    SM1x = np.zeros((frames, total1-1))
    SM1y = np.zeros((frames, total1-1))
    SM2xy = np.zeros((frames, total1-1))

    xs_m = ma.masked_equal(xs_m, 0)
    ys_m = ma.masked_equal(ys_m, 0)

    x_m = ma.masked_equal(x_m, 0)
    y_m = ma.masked_equal(y_m, 0)

    geoM1x = np.zeros(frame_m)
    geoM1y = np.zeros(frame_m)

    for frame in range(1, frame_m):
        bx = xs_m[frame, :]
        cx = xs_m[:-frame, :]
        Mx = (bx - cx)**2

        Mxa = np.mean(Mx, axis=0)
        Mxab = stat.gmean(Mxa, axis=0)

        geoM1x[frame] = Mxab

        by = ys_m[frame, :]
        cy = ys_m[:-frame, :]
        My = (by - cy)**2

        Mya = np.mean(My, axis=0)
        Myab = stat.gmean(Mya, axis=0)

        geoM1y[frame] = Myab
        SM1x[frame, :] = Mxa
        SM1y[frame, :] = Mya

    geoM2xy = geoM1x + geoM1y
    SM2xy = SM1x + SM1y

    return geoM1x, geoM1y, geoM2xy, SM1x, SM1y, SM2xy

In [6]:
total1, xs_m, ys_m, x_m, y_m = MSD_iteration(DIR, sample_name, 20, totvids, conversion, frames)

Total particles after merging datasets and filtering short trajectories: 787


In [7]:
geoM1x, geoM1y, geoM2xy20, SM1x, SM1y, SM2xy = vectorized_MMSD_calcs(frames, total1, xs_m, ys_m, x_m, y_m, frame_m)

  log_a = np.log(a)


In [8]:
yes = 40

xs_m[0:50, 5+yes:10+yes]

array([[ 14.2184 ,  35.87504,  25.73984,  31.5776 ,  27.00608],
       [ 13.8632 ,  35.84   ,  25.77648,  30.98896,  27.75824],
       [ 13.8632 ,  35.76448,  25.69584,  30.52384,  27.54032],
       [ 14.27872,  35.76112,  25.72912,  30.97344,  27.60432],
       [ 14.24048,  35.85296,  25.70032,  31.30144,  27.76816],
       [ 14.116  ,  35.52608,  25.71904,  31.49296,  28.54016],
       [ 14.1232 ,  35.7392 ,  25.60688,  31.44576,  28.54016],
       [ 14.09072,  35.70096,  25.57968,  31.40272,  29.11664],
       [ 14.01296,  35.69088,  25.55536,  31.43088,  28.73232],
       [ 14.0448 ,  35.81056,  25.58848,  31.43296,  28.2992 ],
       [ 14.00288,  35.6912 ,  25.5616 ,  31.45536,  28.67168],
       [ 14.     ,  35.72032,  25.5376 ,  31.43504,  27.77712],
       [ 14.01408,  35.74816,  25.5328 ,  31.43616,  27.74448],
       [ 14.0488 ,  35.73776,  25.60432,  31.47696,  27.74448],
       [ 14.0064 ,  35.71552,  25.55888,  31.42768,  28.964  ],
       [ 14.04608,  35.71824,  25.5608 ,

In [None]:
plt.plot(geoM2xy)
plt.plot(geoM2xy6)
plt.plot(geoM2xy8)
plt.plot(geoM2xy10)
plt.gca().set_xlim([0, 12])
plt.gca().set_ylim([0, 4])
plt.show()


In [None]:
geoM2xy10

In [None]:
plt.plot(geoM1y)
plt.show()
plt.gca().set_xlim([0, 40])
plt.gca().set_ylim([0, 6])

In [5]:
geoM1x = {}
geoM1y = {}
geoM2xy = {}
SM1x = {}
SM1y = {}
SM2xy = {}

for channel in channels:
    for surface_functionality in surface_functionalities:
        slice_counter = 0
        for slic in slices:
            suffix = suffixes[slice_counter]
            sample_name = "{}_{}_0-4p_agarose_{}".format(channel, surface_functionality, slic)
            DIR = folder.format(functionality = surface_functionality, slic = slic)
            total1, xs, ys, x, y = MSD_iteration(DIR, sample_name, cut, totvids, conversion, frames)
            geoM1x[sample_name], geoM1y[sample_name], geoM2xy[sample_name], SM1x[sample_name], SM1y[sample_name],\
                SM2xy[sample_name] = vectorized_MMSD_calcs(frames, total1, xs, ys, x, y, frame_m)
            np.savetxt(DIR+'geoM2xy_{}.csv'.format(sample_name), geoM2xy[sample_name], delimiter=',')
            np.savetxt(DIR+'SM2xy_{}.csv'.format(sample_name), SM2xy[sample_name], delimiter=',')

            slice_counter = slice_counter + 1

Total particles after merging datasets and filtering short trajectories: 2938
Total particles after merging datasets and filtering short trajectories:

  log_a = np.log(a)


 5814
Total particles after merging datasets and filtering short trajectories: 886
Total particles after merging datasets and filtering short trajectories: 3349
Total particles after merging datasets and filtering short trajectories: 2650
Total particles after merging datasets and filtering short trajectories: 2933
Total particles after merging datasets and filtering short trajectories: 3765
Total particles after merging datasets and filtering short trajectories: 2411
Total particles after merging datasets and filtering short trajectories: 192
Total particles after merging datasets and filtering short trajectories: 4518
Total particles after merging datasets and filtering short trajectories: 946
Total particles after merging datasets and filtering short trajectories: 2226
Total particles after merging datasets and filtering short trajectories: 995
Total particles after merging datasets and filtering short trajectories: 5025
Total particles after merging datasets and filtering short tra

In [None]:
sample_name

In [None]:
frames = 480

data, avg_over_slices, time, time_SD, average_over_slices, all_SD_over_slices = \
    data_prep_for_plotting_gels(path, frames, SD_frames, conversion, to_frame, parameters, base);

In [None]:
plot_all_MSD_histograms_gels(parameters, base, folder, SM2xy, time, test_bins, 1, set_y_limit=True, y_range=5000, set_x_limit=True, x_range=60)

In [None]:


quality_control_gels(path2, folder, frames, conversion, parameters, base, interv, cut)

# Plot Datasets Together

Because my PEG and nPEG datasets had different framerates, I had to analyze the datasets separately.  My code requires all datasets to have the same framerate, and thus they can't be plotted together.

In [None]:
for keys in all_SD_over_slices:
    all_SD_over_slices[keys] = all_SD_over_slices[keys]/np.sqrt(8)

In [None]:
in_name1 = "agarose"
in_name2 = "RED"

graph_single_variable(average_over_slices, all_SD_over_slices, time, time_SD, SD_frames, in_name1, in_name2, to_frame=to_frame, y_range=y_range,
                      ticks_y=ticks_y, dec_y=dec_y, x_range=x_range, ticks_x=ticks_x, dec_x=dec_x,
                      modify_labels=False, label_identifier="RED_", x_manual=True, base_name=base_name)
plt.show()

In [None]:
# Creates figure

line_colors=['g', 'r', 'b', 'c', 'm', 'k']
line_kind='-'
labels = ['PEG', 'nPEG']
label_size=95
legend_size=40 
tick_size=50
line_width=10
fig_size=(20, 18)
filename = 'test.png'

to_graph = {}
counter = 0
for keys in average_over_slices:
    to_graph[counter] = keys
    counter = counter + 1
for keys in average_over_slices2:
    to_graph[counter] = keys
    counter = counter + 1

fig = plt.figure(figsize=fig_size, dpi=80)
ax = fig.add_subplot(111)

line_type = [s + line_kind for s in line_colors]
ax.plot(time[0:to_frame], average_over_slices[to_graph[0]][0:to_frame], line_type[0], linewidth=line_width, label=labels[0])
ax.errorbar(time_SD, average_over_slices[to_graph[0]][SD_frames], all_SD_over_slices[to_graph[0]], fmt='', linestyle='',
            capsize=7, capthick=2, elinewidth=2, color=line_colors[0])

ax.plot(time2[0:to_frame], average_over_slices2[to_graph[1]][0:to_frame], line_type[1], linewidth=line_width, label=labels[1])
ax.errorbar(time_SD2, average_over_slices2[to_graph[1]][SD_frames2], all_SD_over_slices2[to_graph[1]], fmt='', linestyle='',
            capsize=7, capthick=2, elinewidth=2, color=line_colors[1])

# A few adjustments to prettify the graph
for item in ([ax.xaxis.label, ax.yaxis.label] +
             ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(tick_size)

xmajor_ticks = np.arange(0, x_range+0.0001, ticks_x)
ymajor_ticks = np.arange(0, y_range+0.0001, ticks_y)

ax.set_xticks(xmajor_ticks)
plt.xticks(rotation=-30)
ax.set_yticks(ymajor_ticks)
ax.title.set_fontsize(tick_size)
ax.set_xlabel('Time (s)', fontsize=label_size)
ax.set_ylabel(r'MSD ($\mu$m$^2$)', fontsize=label_size)
ax.tick_params(direction='out', pad=16)
ax.legend(loc=(0.02, 0.75), prop={'size': legend_size})
plt.gca().xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.{}f'.format(dec_x)))
plt.gca().yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.{}f'.format(dec_y)))

# plt.yscale('log')
# plt.xscale('log')
plt.gca().set_xlim([0, x_range+0.0001])
plt.gca().set_ylim([0, y_range+0.0001])

# Save your figure
plt.savefig('{}'.format(filename), bbox_inches='tight')
plt.show()

In [None]:
def calculate_diffusion_coefficients_gels(channels, surface_functionalities, slices, path, time, time_to_calculate, to_frame, dimension):

    """
    Loads data from csv files and outputs a dictionary following a specified
        sample naming convection determined by the input

    Parameters:
    channels, surface functionalities, media, and concentrations, and replicates
        can take ranges or lists.
    path is string with substition placeholders for concentration and sample
        name (built from channels, surface_functionalities, media,
        concentrations, and replicates).

    Example:
    path = "./{genotype}/{pup}/{region}/{channel}/geoM2xy_{sample_name}.csv";
    get_data(["RED", "YG"], ["WT", "KO", "HET"], ["P1", "P2", "P3", "P4"],
    ["PEG", "noPEG"], ["S1", "S2", "S3", "S4"], ["cortex", "hipp", "mid"],
    [1, 2, 3, 4, 5], path)
    """

    data = {}
    avg_over_slices_raw = {}
    avg_over_pups_raw = {}
    names_with_replicates = {}
    counter = 0
    counter2 = 0

    diffusion_coef_point_derivative = {}
    diffusion_coef_linear_fit = {}

    for channel in channels:
        for surface_functionality in surface_functionalities:
            for slic in slices:
                test_value = "{}_{}_0-4p_agarose_{}".format(channel, surface_functionality, slic)
                avg_over_slices_raw[counter] = test_value
                counter = counter + 1
                sample_name = test_value
                for replicate in replicates:
                    sample_name_long = test_value + "_{}".format(replicate)
                    names_with_replicates[counter2] = sample_name_long
                    counter2 = counter2 + 1
                filename = path.format(functionality = surface_functionality, slic = slic, sample_name=sample_name)
                data[sample_name] = np.genfromtxt(filename, delimiter=",")

                diffusion_coef_point_derivative[sample_name] =\
                    diffusion_coefficient_point_derivative(data[sample_name], time, time_to_calculate, dimension)
                diffusion_coef_linear_fit[sample_name] =\
                    diffusion_coefficient_linear_regression(data[sample_name], time, to_frame, dimension)

    return diffusion_coef_point_derivative, diffusion_coef_linear_fit

In [None]:
calculate_diffusion_coefficients_gels(channels, surface_functionalities, slices, path, time, time_to_calculate, to_frame, dimension)