In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datashader as ds
from scipy.interpolate import interp1d
from scipy.ndimage import gaussian_filter1d
from scipy import stats

dataX = pd.read_table('/Users/filiproch/Downloads/monomer_diffusion/22C/traced/ch1 exp2 22C kymo1-2-1_positions.txt', header=None, usecols= [0, 1, 2])
dataX.columns = ["time", "x", "brightness"]
dataX.head()

interpolatedx = interp1d(dataX['time'], dataX['x'], kind='slinear')
interpolatedb = interp1d(dataX['time'], dataX['brightness'], kind='slinear')


# Generate new time points (including the missing time = 3)
new_time = np.linspace(0, 6, 100)


diff = dataX.x-dataX.x.shift(1)


fig, ax1 = plt.subplots()
ax1.scatter(dataX["x"], dataX["time"], c=dataX["brightness"], cmap='gray', marker='s')
ax1.set_facecolor("black")
fig.show()

fig2, ax2 = plt.subplots()
ax2.hist(diff, bins=int(len(diff)/10))
fig2.show()

fig3, ax3 = plt.subplots()
ax3.violinplot(diff, showmeans=True)
fig3.show()


def plot_dist(data, bins=100, color='#007E94', sigma=1, scale='linear', title='distribution', xlabel = 'xaxis', ylabel='yaxis', xlim = None, ylim = None):
    # Corrected the typo in 'np.historgam' to 'np.histogram'
    height, bin_edges = np.histogram(data, bins=bins, density=True)

    # The bin_edges define the bin edges, so plotting height vs. the bin centers is usually better
    bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])

    if sigma != 0:
        smoothed_height = gaussian_filter1d(height, sigma=sigma)
    else:
        smoothed_height = height

    plt.figure(figsize=(5,3))
    plt.plot(bin_centers, smoothed_height, color=color)
    plt.xscale(scale)  # Set the scale before showing the plot
    plt.xlim(xlim)
    plt.ylim(ylim)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()
    plt.close()

plot_dist(dataX.x, bins=50, sigma=10)


In [None]:
plt.boxplot(pd.DataFrame(diff).dropna(), showmeans=True)

In [None]:
plt.violinplot(pd.DataFrame(diff).dropna(), showmeans=True)

In [None]:
import os

data_dict = {}

def read_txt_files_in_folder(folder_path):
    # Get the list of all files in the folder
    files = os.listdir(folder_path)
    i=0
    # Loop through each file
    for file_name in files:
        # Check if the file is a .txt file
        if file_name.endswith('.txt'):
            file_path = os.path.join(folder_path, file_name)
            try:
                # Open and read the file
                with open(file_path, 'r') as file:
                    data_dict[f"data{i}"] = pd.read_table(file, header=None, usecols= [0, 1, 2], names=['time', 'x', 'brightness'])
                    data_dict[f"data{i}"].head()
                    i+=1
            except Exception as e:
                print(f"Could not read {file_name}: {e}")

# Specify the folder containing the .txt files
folder_pathX = '/Users/filiproch/Downloads/monomer_diffusion/22C/traced'

# Call the function
read_txt_files_in_folder(folder_pathX)    

In [None]:
data_dict.keys()

In [None]:
dif_dict = {}
dif_array = np.array([])
for dat in data_dict.keys():
    dif=pd.DataFrame(data_dict[dat].x-data_dict[dat].x.shift(1)).dropna()
    dif_dict[dat] = dif
    np.append(dif_array, dif)

print(np.shape(dif_array))

plt.boxplot(dif_array, showmeans=True)
#plt.violinplot(dif_array, showmeans=True)

In [1]:
import os
import seaborn as sns
from fitter import Fitter, get_common_distributions, get_distributions

def datareader(path):
    dataBig = pd.read_table(path, header=None, usecols= [0, 1, 2])
    dataBig.columns = ["time", "x", "brightness"]
    diff = list((dataBig.x-dataBig.x.shift(1)).dropna())
    return diff


folder_pathX = "/Users/filiproch/Downloads/monomer_diffusion/22C/traced"
conc = []
distlist = []
sselist = dict((el, 0) for el in ['lognorm','norm','gamma','cauchy','chi2','exponpow','rayleigh','powerlaw','uniform','expon'])


def filereader(folder_path, n, m):
    conc = []
    for file in os.listdir(folder_path):
        if file.endswith('.txt') and file.startswith(f'ch1 exp2 22C kymo{n}'):
            conc.extend(datareader(os.path.join(folder_path, file)))
    if conc == []:
        return None
    conc = np.array(conc)
    #print(len(conc))
    #plt.violinplot(conc, showmeans=True, showmedians=True)
    plt.hist(conc, bins=20)
    #stat = stats.ttest_1samp(conc, 0, alternative='two-sided')
    #print(stat)
    #stat.append([stats.skew(conc), stats.normaltest(conc), conc.mean(),np.median(conc)])
    skew = stats.skew(conc)
    f = Fitter(conc,
           distributions=get_common_distributions())
    f.fit(progress=False)
    summary = f.summary(Nbest=10, plot=False)
    distlist.append(summary.index.to_list())
    #if skew >= 0: conc = -1*conc
    test, p = stats.normaltest(conc)
    mean = conc.mean()
    median = np.median(conc)
    sse = summary.sumsquare_error
    range = np.max(conc) - np.min(conc)
    p05 = np.percentile(conc, 5)
    p95 = np.percentile(conc, 95)
    ipr = p95-p05
    length = len(conc)
    std = np.std(conc)
    for i in sse.index:
        sselist[i] += sse[i]
    sns.kdeplot(conc)
    return test, p, mean, median, range, p05, p95, ipr, skew, length, std

m = 0
stat = []
for n in range(20):
    #for m in range(20):
    stat.append(filereader(folder_pathX, n, m))
    if stat.__contains__(None): stat.remove(None)

stat = np.array(stat)
#print(stat)
statPD = pd.DataFrame(stat, columns=['test', 'p', 'mean', 'median', 'range', 'p05', 'p95', 'ipr', 'skew', 'length', 'std'])
#plt.violinplot(statPD['median'])

NameError: name 'pd' is not defined

In [158]:
statPD

Unnamed: 0,test,p,mean,median,range,p05,p95,ipr,skew,length,std
0,176.564523,4.565633e-39,-0.002584,0.000564,1.200635,-0.16106,0.150674,0.311734,0.077263,1844.0,0.104053
1,45.598103,1.254578e-10,0.001141,-0.000872,0.853848,-0.164029,0.174693,0.338722,0.144862,749.0,0.101615
2,38.792093,3.770545e-09,0.000966,0.000132,0.953132,-0.155441,0.157412,0.312853,-0.00984,664.0,0.100396
3,79.283473,6.078726e-18,-0.002451,0.000897,0.877626,-0.151093,0.141136,0.292229,-0.324195,830.0,0.093714
4,104.666737,1.8702810000000003e-23,0.00041,-0.001538,0.950432,-0.145891,0.152484,0.298375,0.296536,1147.0,0.094185
5,25.456656,2.965906e-06,0.001617,0.000902,0.914651,-0.169833,0.162224,0.332057,0.200318,411.0,0.103988
6,93.120257,6.014411e-21,0.000354,0.00119,0.961699,-0.142206,0.129518,0.271724,-0.099303,826.0,0.091156


In [154]:
sselist

{'lognorm': 125.7753074945723,
 'norm': 126.89881206228677,
 'gamma': 309.5122975585174,
 'cauchy': 88.41508288004007,
 'chi2': 989.4480940638679,
 'exponpow': 452.7105595968801,
 'rayleigh': 770.0520335718224,
 'powerlaw': 1032.5860812864798,
 'uniform': 1039.1700285088589,
 'expon': 1211.6895246817896}

In [137]:
from collections import Counter

flat_list = []

for xs in distlist:
    for x in xs:
        flat_list.append(x)
        
distlistx = Counter(flat_list)

In [138]:
distlistx

Counter({'lognorm': 1,
         'norm': 1,
         'gamma': 1,
         'exponpow': 1,
         'cauchy': 1,
         'rayleigh': 1,
         'powerlaw': 1,
         'uniform': 1,
         'expon': 1,
         'chi2': 1})