In [None]:
#trying DBScan
#define intensity threshold. Points which have a lower intensity will be excluded from the clustering.
int_threshold=0

#apply the threshold on the vesicle and synaptic marker datasets
vesicles_thresh = vesicles[vesicles[:,3]>int_threshold,0:3]
synapse_marker_thresh = synapse_marker[synapse_marker[:,3]>int_threshold, 0:3]

#compute the fraction of points including in the clustering.
frct_points_included = vesicles_thresh.shape[0]/vesicles.shape[0]*100
print('% points above threshold:', frct_points_included)

# Normalize the data to have zero mean and unit variance
data_norm = (vesicles_thresh - np.mean(vesicles_thresh, axis=0)) / np.std(vesicles_thresh, axis=0)

# Apply DBSCAN to the normalized data
dbscan = DBSCAN(eps=800, min_samples=150)
labels = dbscan.fit_predict(vesicles_thresh)

#compute the number of clusters rendered by DBSCAN
unique_labels = np.unique(labels[labels >= 0])
n_clusters = len(unique_labels)
print('number of clusters:', n_clusters)

plt.scatter(vesicles[:,0], vesicles[:,1], s=0.1, alpha=0.5)
for label in unique_labels:
    mask = labels == label
    if np.any(mask):
        plt.scatter(vesicles_thresh[mask, 0], vesicles_thresh[mask, 1], s=20)

plt.title("DBSCAN Clusters")
plt.legend()

# Show the plot
plt.show()
plt.tight_layout()

In [None]:
min_distances = {}
for i in keywords:
    for j in range(0, len(file_dic[i])):
            min_dist = fcts.calc_distance_squared_two(file_dic[i][j], file_dic[i][j])
            if i in min_distances:
                min_distances[i] = np.append(min_distances[i], min_dist)
            else:
                min_distances[i] = np.array([min_dist])

In [None]:
#point pattern analysis in 2D

import pointpats
coordinates=pd.DataFrame(vesicle_clusters['210404 SPON647_PSD680 10DIV_CellZone1'][1][:,0:2], columns = ['x', 'y'])

g_test = pointpats.distance_statistics.g_test(coordinates, support=40, keep_simulations=True)

f, ax = plt.subplots(
    1, 2, figsize=(9, 3), gridspec_kw=dict(width_ratios=(6, 3))
)
# plot all the simulations with very fine lines
ax[0].plot(
    g_test.support, g_test.simulations.T, color="k", alpha=0.01
)
# and show the average of simulations
ax[0].plot(
    g_test.support,
    np.median(g_test.simulations, axis=0),
    color="cyan",
    label="median simulation",
)


# and the observed pattern's G function
ax[0].plot(
    g_test.support, g_test.statistic, label="observed", color="red"
)

# clean up labels and axes
ax[0].set_xlabel("distance")
ax[0].set_ylabel("% of nearest neighbor\ndistances shorter")
ax[0].legend()
#ax[0].set_xlim(0, 2000)
ax[0].set_title(r"Ripley's $G(d)$ function")

# plot the pattern itself on the next frame
ax[1].scatter(*coordinates)

# and clean up labels and axes there, too
ax[1].set_xticks([])
ax[1].set_yticks([])
ax[1].set_xticklabels([])
ax[1].set_yticklabels([])
ax[1].set_title("Pattern")
f.tight_layout()
plt.show()

In [None]:
#point density analysis in 2d

# Set up figure and axis
f, ax = plt.subplots(1, figsize=(6, 6))
# Generate and add KDE with a shading of 50 gradients
# coloured contours, 75% of transparency,
# and the reverse viridis colormap
seaborn.kdeplot(coordinates, x='x',y='y',
    n_levels=50,
    shade=True,
    alpha=0.55,
    cmap="viridis_r",
)

# Remove axes
ax.set_axis_off()

In [None]:
#loading a single file as example using outdated fct.

ex_zone = ['/Volumes/STORM_Nathalie/STORM DeMixing/210414 DEP647_PSD680 8DIV/CellZone4/Demix/CoordTable_SAFE360_MULTIPLEXING_demixed_w1_UncertaintyFiltered.csv',
  '/Volumes/STORM_Nathalie/STORM DeMixing/210414 DEP647_PSD680 8DIV/CellZone4/Demix/CoordTable_SAFE360_MULTIPLEXING_demixed_w2_UncertaintyFiltered.csv']

ex_zone = list_of_files[0]

vesicles = pd.read_csv(ex_zone[0])[['x [nm]', 'y [nm]', 'z [nm]']].to_numpy(dtype=np.float64)
synapse_marker = pd.read_csv(ex_zone[1])[['x [nm]', 'y [nm]', 'z [nm]']].to_numpy(dtype=np.float64)

data=vesicles
image_size = (730,730,16)
kernel_size = (50,50,1)
sigma = 12

wide_field_image = fcts.get_wide_field(vesicles, image_size, kernel_size, sigma)

coords = np.array(coords)

plt.imshow(image[:,:,0])
plt.scatter(coords[:,1], coords[:,0], c='red')

In [None]:
#minimum distance for clusters of vesicles to PSD95 areas - but done as point location instead of point density

image_size = (730,730,16)
kernel_size = (50,50,1)
sigma = 15

max_threshold_ves = 12
max_threshold_mark = 4
max_area = (21,21,21)

min_dist_marker = {}
min_dist_vesicle = {}
SNR_dict = {}
for file_name in list_of_files:
    
    if access == 'drive':
        new_file_name = f"{(file_name[0]).split('/')[4]}_{(file_name[0]).split('/')[5]}"
    
    elif access == 'computer':
        new_file_name = f"{(file_name[0]).split('/')[-1][0:-3]}"
    
    print(new_file_name)
    
    file_info = files_infos[new_file_name]
    
    
    if file_info[0] == 0:
        pass
    
    else:
        vesicles = pd.read_csv(file_name[file_info[1]])[['x [nm]', 'y [nm]', 'z [nm]']].to_numpy(dtype=np.float64)
        synapse_marker = pd.read_csv(file_name[(file_info[1]-1)**2])[['x [nm]', 'y [nm]', 'z [nm]']].to_numpy(dtype=np.float64)
        
        #plot the locations of both the vesicles and the synaptic marker using a large point spread function.
        wide_field_vesicles = fcts.get_wide_field(vesicles, image_size, kernel_size, sigma)
        wide_field_marker = fcts.get_wide_field(synapse_marker, image_size, kernel_size, sigma)
        
        #plot the locations of the vesicles using the same image size as above to a single pixel size
        image_vesicles = fcts.get_wide_field(vesicles, image_size, (1,1,1), sigma)
        
        #calculate the intensity threshold for the large PSF images, depending on an arbitrary intensity threshold, dependent on the mean and std of each image.
        ves_thresh = wide_field_vesicles.mean() + wide_field_vesicles.std() * max_threshold_ves
        marker_thresh = wide_field_marker.mean() + wide_field_marker.std() * max_threshold_mark
        
        #create a mask of the large PSF images where for the pixels above the threshold
        mask_vesicles = (wide_field_vesicles > ves_thresh) * 1

        plt.imshow(mask_vesicles[:,:,0])
        plt.show()
        mask_marker = (wide_field_marker > marker_thresh) * 1

        #create an array for the vesicles which are located within the mask
        masked_vesicles = np.where(mask_vesicles, image_vesicles, 0)
        vesicle_pos_tup = np.where(masked_vesicles > 0)
        vesicle_pos = np.array([vesicle_pos_tup[0],vesicle_pos_tup[1],vesicle_pos_tup[2]]).T
        vesicle_pos = vesicle_pos/image_size * 49660
        
        nb_vesicles_tot = vesicles.shape[0]
        nb_vesicles_selected = vesicle_pos.shape[0]
        snr = nb_vesicles_selected/nb_vesicles_tot
        
        #calculate the distance between vesicles within the masked images and all synapse markers
        distance_to_marker = fcts.calc_distance_squared_two(vesicle_pos, synapse_marker)
        distance_to_vesicles = fcts.calc_distance_squared_two(vesicle_pos, vesicle_pos)
        print(distance_to_vesicles)
        min_dist_marker[new_file_name] = distance_to_marker
        min_dist_vesicle[new_file_name] = distance_to_vesicles
        SNR_dict[new_file_name] = snr
        
        
print('done')

In [None]:
import numpy as np

def merge_points(points, diameter=40):
    """
    Merges points that are within `diameter` distance of each other and returns a new array with one point per vesicle
    """
    merged_points = []
    while len(points) > 0:
        p = points[0]
        points = np.delete(points, 0, axis=0)
        nearby_points = np.linalg.norm(points - p, axis=1) < diameter
        while np.sum(nearby_points) > 0:
            p = np.mean(np.concatenate([points[nearby_points], [p]]), axis=0)
            points = np.delete(points, np.where(nearby_points)[0], axis=0)
            nearby_points = np.linalg.norm(points - p, axis=1) < diameter
        merged_points.append(p)
    return np.array(merged_points)


# Example dataset of 100 points with x,y,z coordinates
points = np.random.rand(100, 2)

# Merge points within 40nm of each other
merged_points = merge_points(points, diameter=0.1)

# Check the number of original points vs merged points
print(f"Original points: {points.shape[0]}, Merged points: {merged_points.shape[0]}")

plt.scatter(points[:,0], points[:,1], alpha=0.5)
plt.scatter(merged_points[:,0], merged_points[:,1], alpha=0.5)
plt.show()

In [None]:
#permutation test

x = vesicle_type_nd_DIV['DEP647_PSD680 8DIV']
y = vesicle_type_nd_DIV['SPON647_PSD680 8DIV']


def statistic(x, y, axis):
    return np.mean(x, axis=axis) - np.mean(y, axis=axis)

from scipy.stats import permutation_test
# because our statistic is vectorized, we pass `vectorized=True`
# `n_resamples=np.inf` indicates that an exact test is to be performed
res = permutation_test((x, y), statistic, vectorized=True,
                       n_resamples=9999, alternative='less')
print(res.statistic)
print(res.pvalue)
