In [None]:
from importlib import reload
import pandas as pd
import numpy as np
# Plotting tools
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
# Stats analysis tools
from scipy.stats import shapiro
from scipy.stats import gaussian_kde
from scipy.signal import find_peaks
# byc modules
from byc import plotting
import byc.standard_analysis as sa
import byc.fitting_tools as ft
import byc.files as fm
import byc.database as db
import byc.trace_tools as tt
# Set styles
plotting.set_styles(plt, matplotlib)

In [None]:
reloads = [sa, ft, plotting,
           fm, db, tt]
for module in reloads:
    reload(module)
byc_database = db.byc_database

In [None]:
byc_database = db.byc_database
db_df = byc_database.trace_database_df
expt_names = db_df.expt_name.unique()
expt_names

In [None]:
expt_summaries = ['UBL-YFP-NS int. in young BY4741',
                  'RKK-YFP-Su9 int. in young BY4741',
                  'RKK-YFP-SP2 int. in young BY4741',
                  'UBL-YFP-NS cen4 in young rpn4d',
                  'UBL-YFP-NS cen4 in old rpn4d',
                  'UBL-YFP-NS cen4 in old pdr5d',
                  'UBL-YFP-NS cen4 in young pdr5d',
                  'UBL-YFP-NS cen4 in young ubr2d',
                  'UBL-YFP NS cen4 in young and old pdr5d']

summ_dict = dict(zip(expt_names, expt_summaries))

In [None]:
dfs_lists = [byc_database.get_cell_trace_dfs(name) for name in expt_names[0:8]]
chase_indices = []
for i in range(len(dfs_lists)):
    dfs_list = dfs_lists[i]
    name = expt_names[i]
    db_df_expt = db_df[db_df.expt_name == name]
    chase_index = db_df_expt.loc[db_df_expt.index.min(), 'chase_index']
    chase_indices.append(chase_index)
    
    for df in dfs_list:
        df.loc[:, 'expt_name'] = name
        df.loc[:, 'summary'] = expt_summaries[i]
        
chase_ind_dict = dict(zip(expt_names, chase_indices))

In [None]:
chase_ind_dict

In [None]:
from importlib import reload
reload(fitting_tools)

In [None]:
# Fit all the experiments with single and double expential and create a list of the groupby
# fit outputs for each expt
fit_dfs = []
expt_i = 0
for dfs_list in dfs_lists:
    expt_name = expt_names[expt_i]
    chase_start = chase_indices[expt_i]
    print(type(chase_start))
    
    single_df = ft.get_all_fits_df(dfs_list, int(chase_start), window_size=36, fit_func=ft.single_exp)
    single_df = single_df.groupby(by='cell_index').median().reset_index()
    single_df.loc[:, 'fit_type'] = 'single_exp'

    double_df = ft.get_all_fits_df(dfs_list, int(chase_start), window_size=36, fit_func=ft.double_exp)
    double_df = double_df.groupby(by='cell_index').median().reset_index()
    double_df.loc[:, 'fit_type'] = 'double_exp'

    single_double_df = pd.concat([single_df, double_df], ignore_index=True, sort=False)
    single_double_df.loc[:, 'expt_name'] = expt_name
    expt_i += 1
    
    fit_dfs.append(single_double_df)
    
all_fit_dfs = pd.concat(fit_dfs, ignore_index=True, sort=False)

In [None]:
ax = plt.axes()
ax.hist(all_fit_dfs.e)
ax.set_xscale('log')

In [None]:
sorted_by_b = all_fit_dfs[all_fit_dfs.fit_type == 'single_exp'].groupby(by='expt_name').median().sort_values(by='b').reset_index()

In [None]:
sorted_by_b

In [None]:
all_fit_dfs.loc[:, 'log_shapiro_p'] = np.log10(all_fit_dfs.shapiro_p)

In [None]:
all_fit_dfs.log_shapiro_p.max()

In [None]:
all_fit_dfs.shapiro_p.max()

In [None]:
colors[0]

In [None]:
colors[1]

In [None]:
np.linspace(0, 1, 5)

### Plot rates

In [None]:
xlabels = []

for name in sorted_by_b.expt_name:
    summary = summ_dict[name]
    single_decay_rate = np.round(float(sorted_by_b[sorted_by_b.expt_name == name].b), 2)
    single_decay_rate = str(single_decay_rate)
    
    xlabel = f'{summary} ({single_decay_rate})'
    xlabels.append(xlabel)

fig = plt.figure(figsize=(7, 4), tight_layout=True)
fig.set_dpi(300)
colors = cm.rainbow(np.linspace(0, 1, 8), alpha=1)
ax = sns.stripplot(x='b', y='expt_name', data=all_fit_dfs,
                   order=sorted_by_b.expt_name, dodge=True,
                   palette=colors, alpha=0.4, size=7, orient='h')

ax.set_yticklabels(xlabels)
ax.set_ylabel('Experiment (with Median Single Decay Rate)', fontsize=12)
ax.set_xlabel('Single Decay Rate (1/hr.)', fontsize=12)
#ax.set_xscale('log')
ax.set_xlim(0, 4)
for spine in [ax.spines[name] for name in ['top', 'right']]:
    spine.set_visible(False)
    

fig.savefig('all_experiments_single_decay_constant_vs_median_single_decay_constant.png')

### Plot resids shapiro p

In [None]:
xlabels = []

for name in sorted_by_b.expt_name:
    summary = summ_dict[name]
    single_decay_rate = np.round(float(sorted_by_b[sorted_by_b.expt_name == name].b), 2)
    single_decay_rate = str(single_decay_rate)
    
    xlabel = f'{summary} ({single_decay_rate})'
    xlabels.append(xlabel)

fig = plt.figure(figsize=(7, 4), tight_layout=True)
fig.set_dpi(300)
colors = cm.rainbow(np.linspace(0, 1, 4), alpha=0.3)
ax = sns.stripplot(x='shapiro_p', y='expt_name', hue='fit_type', data=all_fit_dfs,
                   order=sorted_by_b.expt_name, palette=['purple', 'green'], dodge=True,
                   alpha=0.3, size=7, orient='h')

ax.set_yticklabels(xlabels)
ax.set_ylabel('Experiment (with Median Single Decay Rate)', fontsize=12)
ax.axvline(0.05, color='black', linestyle='--', alpha=0.8)
ax.set_xlabel('log(Shapiro p of Residuals)', fontsize=12)
ax.set_xscale('log')
ax.set_xlim(1.2, 0.00001)
for spine in [ax.spines[name] for name in ['top', 'right']]:
    spine.set_visible(False)
    

fig.savefig('all_experiments_resids_shapiro_p_vs_median_single_decay_constant.png')

### Plot resids std. error

In [None]:
xlabels = []

for name in sorted_by_b.expt_name:
    summary = summ_dict[name]
    single_decay_rate = np.round(float(sorted_by_b[sorted_by_b.expt_name == name].b), 2)
    single_decay_rate = str(single_decay_rate)
    
    xlabel = f'{summary} ({single_decay_rate})'
    xlabels.append(xlabel)

fig = plt.figure(figsize=(7, 4), tight_layout=True)
fig.set_dpi(300)
colors = cm.rainbow(np.linspace(0, 1, 4), alpha=0.3)
ax = sns.stripplot(x='est_std_err', y='expt_name', hue='fit_type', data=all_fit_dfs,
                   order=sorted_by_b.expt_name, palette=['purple', 'green'], dodge=True,
                   alpha=0.3, size=7, orient='h')

ax.set_yticklabels(xlabels)
ax.set_ylabel('Experiment (with Median Single Decay Rate)', fontsize=12)
ax.set_xlabel('Standard Error of Regression (norm. YFP))', fontsize=12)
ax.set_xlim(0, 0.3)
for spine in [ax.spines[name] for name in ['top', 'right']]:
    spine.set_visible(False)
    

fig.savefig('all_experiments_est_std_err_vs_median_single_decay_constant.png')

### Error vs. Shapiro p

In [None]:
ax = plt.axes()
ax.scatter(all_fit_dfs.shapiro_p, all_fit_dfs.est_std_err,
            alpha=0.1)

ax.set_xlim(1.2, 0.00001)
ax.set_xscale('log')
#ax.set_xticks(np.linspace(0.1, -5, 5))

ax.set_ylim(0, 0.2)
ax.set_yticks(np.linspace(0, 0.2, 5))

In [None]:
# Fit cell in all possible windows of width 30 frames 
scan_df = ft.scan_start_frames(dfs_list[1])
ax = plt.axes()
ax.scatter(scan_df.start_frame, scan_df.est_std_err)
ax2 = ax.twinx()
ax2.plot(dfs_list[0].Slice-1, dfs_list[0].yfp_norm)
ax2.axvline(scan_df.start_frame[scan_df.est_std_err.argmin()])
print(scan_df.est_std_err.argmin())

In [None]:
def add_columns_suffix(df, suffix):
    
    old_names = list(df.loc[:, :'y_input_norm'].columns)
    new_names = [f'{col}_{suffix}' for col in df.loc[:, 'y_pred_norm':].columns]
    all_names = old_names + new_names
    new_name_dict = dict(zip(list(df.columns), all_names))
    new_df = df.rename(columns=new_name_dict).copy()
    
    return new_df

#### Merging data frames and having separate columns for each disparate fit output

In [None]:
single_df = ft.get_all_fits_df(dfs_list, chase_start, window_size='max', fit_func=ft.single_exp)
single_df = single_df.groupby(by='cell_index').median().reset_index()
double_df = ft.get_all_fits_df(dfs_list, chase_start, window_size='max', fit_func=ft.double_exp)
double_df = double_df.groupby(by='cell_index').median().reset_index()

single_df = add_columns_suffix(single_df, suffix='single')
double_df = add_columns_suffix(double_df, suffix='double')

single_double_df = single_df.merge(double_df, how='outer', on='cell_index').copy()

In [None]:
plt.scatter(single_double_df.est_std_err_single, single_double_df.est_std_err_double)

#### Concating data frames and having another column to say type of fit

In [None]:
single_df = ft.get_all_fits_df(dfs_list, chase_start, window_size='max', fit_func=ft.single_exp)
single_df = single_df.groupby(by='cell_index').median().reset_index()
single_df.loc[:, 'fit_type'] = 'single_exp'

double_df = ft.get_all_fits_df(dfs_list, chase_start, window_size='max', fit_func=ft.double_exp)
double_df = double_df.groupby(by='cell_index').median().reset_index()
double_df.loc[:, 'fit_type'] = 'double_exp'

In [None]:
single_double_df = pd.concat([single_df, double_df], ignore_index=True, sort=False)

In [None]:
single_double_df.columns

In [None]:
fig = plt.figure(figsize=(3, 3))
fig.set_dpi(100)

ax = sns.swarmplot(x='fit_type', y='shapiro_p', hue='cell_index', data=single_double_df)
ax.legend('')

In [None]:
reload(ft)

In [None]:
reload(plotting)

In [None]:
20191213_byc_by4741_young_rkk-sp2_pJC033
20191106_byc_by4741_young_NS

In [None]:
len(dfs_list)

### Plot traces and residuals vs. time and probability density of residuals

In [None]:
expt_name = '20191106_byc_by4741_young_NS'
dfs_list = byc_database.get_cell_trace_dfs(expt_name)[0:10]
chase_start = chase_ind_dict[expt_name]

single_fits_df = ft.get_all_fits_df(dfs_list, chase_start, window_size=36, fit_func=ft.single_exp,
                                    col_name='yfp_norm')

xticks_traces = np.linspace(0, 6, 7)
yticks_traces = [0, .2, .4, .6, .8, 1.0, 1.2]

yticks_resids = np.linspace(-0.3, 0.3, 5)

plotting.plot_fits_and_residuals(single_fits_df, dfs_list, expt_name, scatteralpha=0.8, linealpha=0.8,
                                 xticks_traces=xticks_traces, yticks_traces=yticks_traces,
                                 yticks_resids=yticks_resids)

### Table of fit parameters and quality measures

In [None]:
all_fits_df = single_fits_df
sort_by = 'est_std_err'

filename = f'{expt_name}_double_exp_rates_and_errors'
fileformat = '.png'

col_keys = ['b',
            'shapiro_p',
            'est_std_err',
            'r_sq']

col_titles=['k (1/hr.)',
            'Shapiro p\nof Residuals',
            'Estimate Std.\n Err.',
            'r_sq']

alpha = 0.05
colwidths = [0.1 for col in col_titles]
width = 1.8
height = 1.75
loc = 'center'

all_fits_df_gb = all_fits_df.groupby(by='cell_index').median().copy()
sorted_df = all_fits_df_gb.sort_values(by=sort_by)

colors = cm.rainbow(np.linspace(0, 1, len(dfs_list)), alpha=0.65)
colors_sorted = [colors[i] for i in sorted_df.index]

fig = plt.figure(figsize=(5, 5), tight_layout=True)
fig.set_dpi(300)
ax = fig.add_subplot()

cell_indices = sorted_df.index
row_labels = [f'Cell {i}' for i in cell_indices]
cell_text = []
# Add rate constant and p value etc. for each cell
for cell_index in cell_indices:

    cell_text_row = []
    for column in col_keys:
        cell_text_row_val = sorted_df.loc[cell_index, column]
        cell_text_row_val = np.round(cell_text_row_val, 3)
        
        if column == 'shapiro_p' and cell_text_row_val < alpha:
            shapiro_p_star = f'*{cell_text_row_val}'
            cell_text_row.append(shapiro_p_star)
        else:
            cell_text_row.append(cell_text_row_val)
            
    cell_text.append(cell_text_row)

table = ax.table(cellText=cell_text,
                 colLabels=col_titles,
                 rowLabels=row_labels,
                 rowColours=colors_sorted,
                 colWidths=colwidths,
                 loc=loc)
table.set_fontsize(12)
table.scale(width, height)

if filename:
    fig.savefig(f'{filename}{fileformat}')