In [None]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns
from pathlib import Path
import h5py

import utils as utl

In [None]:
# sample rates
r_stim, r_rec = 2e5, 3.048

# path to folder
parent_dir = Path(r'\\mpfi.org\public\sb-lab\Nino_2P_for_Salil\for_Nico')

# selection rule: all ROI.csv files 
p_csvs = [*parent_dir.glob('Gr5aLexA_opChrimsonR_*/**/ROI.csv')]

# maximum number of stims
n_max = 4

# generate quality control plots or not
plot = True

# collect all data in this file
p_out =  Path('./stim')
p_data = p_data / 'opt_fiber.pickle'

# preprocessing

In [None]:
# loop over als ROI.csv
l = []
for p_csv in p_csvs:

    # workdir
    p_dir = p_csv.parent

    # check for h5 file
    l_hpf = [ *p_dir.glob('*.h5') ]
    if len(l_hpf) != 1:
        print(f'INFO: directory must contain exactly one h5 file, skipping {p_dir}')
        continue
    p_hdf = l_hpf[0]

    # load data from csv file
    print('INFO: now parsing {}'.format(p_csv))
    df_rec = pd.read_csv(p_csv)
    f_rec_max = len(df_rec)

    # load data from h5 file
    print('INFO: now parsing {}'.format(p_hdf))
    with h5py.File(p_hdf, 'r') as f:
        stim = f['stim'][:]
        f_stim_on, f_stim_max = utl.get_onsets(stim, r_stim, r_rec, n_max=n_max)
        f_stim_on = f_stim_on - f_stim_max + f_rec_max

    # get additional info from file path
    n_fly = int(''.join(filter(str.isdigit, p_dir.parts[-2])))
    exp = p_dir.parts[-3]
    s = p_dir.parts[-1].split('_')
    n_trial = int(s[0].replace('trial', '')) 
    freq = float(s[1])
    

    fs, rois, intens = [], [], []
    f_rec = df_rec.loc[:, ' '].values - 1

    # reorder: background is now first ROI
    cols =  df_rec.columns[-1:].tolist() + df_rec.columns[1:-1].tolist() 

    # loop over ROIs and stims 
    for i, col in enumerate(cols):
        for j in f_stim_on:
            fs.extend(f_rec - j)
            intens.extend(df_rec.loc[:, col].values)
            rois.extend([i] * len(f_rec))

    # collect in data frame
    df = pd.DataFrame(data={
        'frame': fs,
        't': [ i / r_rec for i in fs ],
        'int': intens,
        'roi': rois,
        'exp': exp,
        'fly': n_fly,
        'trial': n_trial,
        'freq': freq,
    })

    # generate quality control plots
    if plot:
        fig, ax = plt.subplots(figsize=(15, 5))
        for col in df_rec.columns[1:]:

            y = df_rec.loc[:, col].values
            x = np.arange(len(y)) / r_rec

            ax.plot(x, y, label=col)

        y = stim
        x = np.arange(len(y)) / r_stim - f_stim_max / r_rec + f_rec_max / r_rec
        ax.plot(x, y, color='gray')

        ax.legend()

        for f in f_stim_on:
            t = f / r_rec
            ax.axvline(t, color='gray')

        ax.set_title(f'{freq}')
        ax.set_xlabel('time [s]')
        ax.set_ylabel('fluorescence')

        fig.tight_layout()
        
        fig.savefig(p_csv.with_suffix('.png'))

        plt.close(fig)

    l.append(df)

# combine dataframes
df_all = pd.concat(l, ignore_index=True)

# add short exp names
exps = df_all.loc[:, 'exp'].unique()
exp_dict = { a: b for a, b in zip(exps, 'abcdefghijklmnop') }
df_all.loc[:, 'exp_'] = df_all.loc[:, 'exp'].map(exp_dict)
df_all

# write to disk
df_all.to_pickle(p_data)

# print contents of df_all
for k in df_all.groupby(['exp', 'fly', 'trial', 'freq', 'roi']).groups.keys():
    print(f'exp: {k[0]}', f'fly: {k[1]}', f'trial {k[2]}', f'freq {k[3]}', f'roi {k[4]}')

# analysis

In [None]:
# load preprocessed data
df_all = pd.read_pickle(p_data)

# cycle through trials and ROIs
for k, df in df_all.groupby(['exp', 'fly', 'freq', 'trial', 'roi']):
    
    # expand group key
    exp, fly, f, t, r = k

    if r == 0: # get background (ROIs where reordered in preprocessing)
        y0 = df.loc[:, 'int'].values
        
    else: # r != 0 are ROIs
        y = df.loc[:, 'int'].values - y0 # subtract background

        x = df.loc[:, 't'].values # get values for f0
        mask = (x > -3) & (x < 0)

        # if mask.sum() != 9: # disregard misaligned
        #     continue

        y -= y[mask].mean()
        if np.any(y != y):
            y
        df_all.loc[df.index, 'int_norm'] = y

        # zscore
        y = ( y - y.mean() ) / y.std()
        y -= y[mask].mean()
        df_all.loc[df.index, 'int_norm_z'] = y

## example exp, fly, roi

In [None]:
# filter
exp, fly, roi = 'Gr5aLexA_opChrimsonR_FoxgloveGC7b', 4, 1
df_exp = df_all.groupby('exp').get_group(exp)
df_fly = df_exp.groupby('fly').get_group(fly)
df_roi = df_fly.groupby('roi').get_group(roi)


fig, ax = plt.subplots()
sns.lineplot(ax=ax, data=df_roi, x='t', y='int_norm', hue='freq', errorbar='sd')

ax.set_xlim(-5, 20)
ax.set_ylabel('df/f')
ax.set_xlabel('time [s]')
ax.set_title('{}, fly {}, ROI {}'.format(exp, fly, roi))

fig.tight_layout()


In [None]:
# exp = 'GH146-Gal4-GC7f-ChrimsonRmCherry'
# exp, roi, freq = 'GH146-Gal4-GC7f-CsChrimsonmCherry', 1, 100
exp, roi, freq = 'data_paper', 1, 1.0

df_exp = df_all.groupby('exp').get_group(exp)
df_freq = df_exp.groupby('freq').get_group(freq)
df_roi = df_exp.groupby('roi').get_group(roi)

fig, ax = plt.subplots()

sns.lineplot(ax=ax, data=df_roi, x='t', y='int_norm_z', hue='fly', errorbar='sd')

ax.set_xlim(-5, 20)

ax.set_ylabel('zscored(df/f)')
ax.set_xlabel('time [s]')
ax.set_title('{}, freq {}, ROI {}'.format(exp, freq, roi))


fig.tight_layout()

fig.savefig('./plots/paper_roi1.svg')

## calculate area under the curve

In [None]:
# integrate
l = ['exp', 'exp_', 'fly', 'freq', 'trial', 'roi']
gr_all = df_all.groupby(l)

df_a = pd.DataFrame(columns=l)
for i, (k, df) in enumerate(gr_all):
    df_a.loc[i, l] = k

    x = df.loc[:, 't'].values
    mask = (x >= 0) & (x <= 2)
    
    y = df.loc[:, 'int'].values
    a1 = np.trapz(y[mask], x=x[mask])

    y = df.loc[:, 'int_norm'].values
    a2 = np.trapz(y[mask], x=x[mask])

    y = df.loc[:, 'int_norm_z'].values
    a3 = np.trapz(y[mask], x=x[mask])

    df_a.loc[i, ['area', 'area_norm', 'area_norm_z'] ] = a1, a2, a3

l = ['fly', 'trial', 'roi']
df_a.loc[:, l] = df_a.loc[:, l].astype(int)
df_a.loc[:, 'freq'] = df_a.loc[:, 'freq'].astype(float)

df_a = df_a.dropna()

In [None]:
# fit line to frequencies for given ROI

In [None]:
roi = 1

df_roi = df_a.groupby('roi').get_group(roi)
g = sns.lmplot(data=df_roi , height=2, 
        x='freq', y='area_norm_z', col='fly', row='exp_', palette='tab10')
g.set_axis_labels('freq [Hz]', 'z(A)')
g.fig.suptitle(exp_dict)
g.tight_layout()

g.fig.savefig('./plots/area_individual.svg')

In [None]:
l = ['exp', 'exp_', 'roi']
gr = df_a.groupby(l)

df_f = pd.DataFrame(columns=l)
for i, (k, df) in enumerate(gr):


    df_f.loc[i, l] = k


    x = df.loc[:, 'freq'].values.reshape(-1, 1)
    y = df.loc[:, 'area_norm_z'].values
    reg = LinearRegression()
    reg.fit(x, y)
    df_f.loc[i, 'slope'] = reg.coef_
    df_f.loc[i, 'intercept'] =reg.intercept_
    df_f.loc[i, 'r2'] = reg.score(x, y)

df_f