In [2]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from ipywidgets import interactive
import ipywidgets as widgets
from ipywidgets import Layout, Button, Box, VBox,Label
import plotly as ply
from dance_sim_tools import trace_objects,utility
from dance_sim_tools.utility import histogram_draw_to_parameters as htp
from dance_sim_tools.ipywidget_helpers import slider
import pandas as pd
import dance_sim_tools.trace_objects as trace_objects
import warnings
warnings.filterwarnings('ignore')
import scipy.stats
import matplotlib.gridspec

Here, we perform the same regression bootstrapping analysis we did in the first section of model_traces_824.ipynb, to look at how the new definition adjusts things (or not) having to do with the tightness of cs, other properties of the distribution of cs.

In [10]:
#Pull up all of the r_1 and r_0 for each three conditions 

#we collect all the ks and the cs for each of the 3 conditions

def zero_insert(a):
    return np.hstack((np.array([0]),a))

def g(m,logN,n_bins):

    n_bins = int(n_bins)
#     m = 5     #Set a subsample size m out of all the flies in the condition
    m = int(m)
    N = int(np.exp(logN))
    ks = np.full((3,N),np.nan)
    cs = np.full((3,N),np.nan)
    fit_errs = np.full((3,N),np.nan)


    for condition in [1,2,3]:

        r_0_filename = 'r_0andr_1adjusted/r_0_'+str(condition)+'.txt'    
        empirical_r0s = pd.read_csv(r_0_filename).values.T[0]#, header=None, usecols=[2])
        empirical_r0s = np.radians(empirical_r0s)

        r_1_filename = 'r_0andr_1adjusted/r_1_'+str(condition)+'.txt'    
        empirical_r1s = pd.read_csv(r_1_filename).values.T[0]#, header=None, usecols=[2])
        empirical_r1s = np.radians(empirical_r1s)

        #For N iterations, draw m pairs of r_1,r_0, and compute a least squares fit for k,c using only the subsample
        for i in range(N):
            #randomly select m r_1,r_0 pairs from all r_1s,r_0s for the condition
            #draw m of r_1
            r_1_draw_idx = np.arange(len(empirical_r1s))
            np.random.shuffle(r_1_draw_idx)
            r_1_draw = empirical_r1s[r_1_draw_idx[:m]]
            #Find the r_0 with the same index (=fly index) as those drawn above
            r_0_draw = empirical_r0s[r_1_draw_idx[:m]]

            #Compute a least squares fit for k,c using only the subsample
            #Idea is that r_1 = k*r_0 + c
            #So fit a 2-constant linear regression w/ input r_0 and output r_1
    #         print(r_1_draw,r_0_draw)
            k, c, r_value, p_value, std_err = scipy.stats.linregress(r_0_draw,r_1_draw)
    #         print(k)
            ks[condition-1,i] = k
            cs[condition-1,i] = c
            fit_errs[condition-1,i] = std_err

#     fit_errs = np.degrees(fit_errs)

    #Plot the distribution of ks (plot 1) and cs (plot 2) from all k iterations for each condition
        
    colors = ['blue','orange','red']

    kmin,kmax = 0,3 
    plt.figure(figsize=(12,12))
 
    alpha = 0.1
    
    cs = np.degrees(cs)

    ax1 = plt.subplot(221)
    kbins = np.linspace(kmin,kmax,n_bins)
    n,_,_=plt.hist(ks[0,:],bins=kbins,label='1F',alpha=alpha,color=colors[0])
    plt.step(kbins,zero_insert(n),color=colors[0])
    n_k,_,_=plt.hist(ks[1,:],bins=kbins,label='2F60',alpha=alpha,color=colors[1])
    plt.step(kbins,zero_insert(n_k),color=colors[1])
    n,_,_=plt.hist(ks[2,:],bins=kbins,label='2F90',alpha=alpha,color=colors[2])
    plt.step(kbins,zero_insert(n),color=colors[2])
    plt.xticks(np.arange(kmin,kmax,0.5))
    plt.xlim([kmin,kmax])
    plt.title('Ks')
    plt.legend()
    
    
    #investigate what's happening with the second peak of k values in the 2F case
    second_bump_range = [1.5,2.5]
    ks_in_range_inds = (ks[1,:]>second_bump_range[0])&(ks[1,:]<second_bump_range[1])

    #do the other bump for comparison
    first_bump_range = [0.8,1.2]
    ks_in_range1_inds = (ks[1,:]>first_bump_range[0])&(ks[1,:]<first_bump_range[1])

    
    

    cmin,cmax = -50,180
    ax2 = plt.subplot(222)
    cbins = np.linspace(cmin,cmax,n_bins)
    n,_,_=plt.hist(cs[0,:],bins=cbins,label='1F',alpha=alpha,color=colors[0])
    plt.step(cbins,zero_insert(n),color=colors[0])
    n_c,_,_=plt.hist(cs[1,:],bins=cbins,label='2F60',alpha=alpha,color=colors[1])
    plt.step(cbins,zero_insert(n_c),color=colors[1])
    n,_,_=plt.hist(cs[2,:],bins=cbins,label='2F90',alpha=alpha,color=colors[2])
    plt.step(cbins,zero_insert(n),color=colors[2])


#     plt.xticks(np.arange(cmin,cmax,10))
    plt.title('Cs')
    plt.legend()

    
    #plot the values of cs in the 2F case where the second k peak is
    bin_inds = np.digitize(cs[1,ks_in_range_inds],cbins)
    to_plot_y = n_c[bin_inds]
#     to_plot_x = cbins[bin_inds]
    to_plot_x = cs[1,ks_in_range_inds]+(cbins[1]-cbins[0])/2
    plt.plot(to_plot_x,to_plot_y,'o',color='teal',alpha=alpha)
    #Show the k's the c's match to
    bin_inds = np.digitize(ks[1,ks_in_range_inds],kbins)
    to_plot_y = n_k[bin_inds]
    #to_plot_x = kbins[bin_inds]
    to_plot_x = ks[1,ks_in_range_inds]+(kbins[1]-kbins[0])/2
    ax1.plot(to_plot_x,to_plot_y,'o',color='teal',alpha=alpha)
    
    
    #do the above for the first peak
    bin_inds= np.digitize(cs[1,ks_in_range1_inds],cbins)
    to_plot_y = n_c[bin_inds]
#    to_plot_x = cbins[bin_inds]
    to_plot_x = cs[1,ks_in_range1_inds]+(cbins[1]-cbins[0])/2
    plt.plot(to_plot_x,to_plot_y,'o',color='purple',alpha=alpha)
    #Show the k's the c's match to
    bin_inds = np.digitize(ks[1,ks_in_range1_inds],kbins)
    to_plot_y = n_k[bin_inds]
#     to_plot_x = kbins[bin_inds]
    to_plot_x = ks[1,ks_in_range1_inds]+(kbins[1]-kbins[0])/2
    ax1.plot(to_plot_x,to_plot_y,'o',color='purple',alpha=alpha)
    
    #Plot a histogram of the fit r_sqs
    ax1 = plt.subplot(224)
    n,bins,_=plt.hist(fit_errs[0,:],bins=n_bins,label='1F',alpha=alpha,color=colors[0])
    plt.step(bins,zero_insert(n),color=colors[0])
    n_k,_,_=plt.hist(fit_errs[1,:],bins=bins,label='2F60',alpha=alpha,color=colors[1])
    plt.step(bins,zero_insert(n_k),color=colors[1])
    n,_,_=plt.hist(fit_errs[2,:],bins=bins,label='2F90',alpha=alpha,color=colors[2])
    plt.step(bins,zero_insert(n),color=colors[2])
    plt.title('Fit Std Errors')
    plt.legend()
    plt.xlim([0,2])
    
    
            

 
sm =  slider('m',1,23,1,8)
slog_N =  slider('logN',3,10,1,7)
sn_bins =  slider('n_bins',5,100,5,50)


sliders = [sm,slog_N,sn_bins]

items = [Box([slider]) for slider in sliders]

ui = Box(items, layout=Layout(
    display='flex',
    flex_flow='column',
    border='solid 2px',
    align_items='stretch',
    width='50%'
))

slider_names = [slider.description for slider in sliders]
param_dict =  dict(zip(slider_names,sliders))

out = widgets.interactive_output(g, param_dict)

display(ui,out)


Box(children=(Box(children=(FloatSlider(value=8.0, continuous_update=False, description='m', max=23.0, min=1.0…

Output()

Below: similar to what we did above, except instead of fitting a 2-parameter model, we are just looking at the bootstrapped distribution of r1-r0 subsample means.

In [11]:
#Pull up all of the r_1 and r_0 for each three conditions 

#we collect all the ks and the cs for each of the 3 conditions

def zero_insert(a):
    return np.hstack((np.array([0]),a))

def g(m,logN,n_bins):

    n_bins = int(n_bins)
#     m = 5
    m = int(m)     #Set a subsample size m out of all the flies in the condition
    N = int(np.exp(logN))
    cs = np.full((3,N),np.nan)
    fit_stds = np.full((3,N),np.nan)

    for condition in [1,2,3]:

        r_0_filename = 'r_0andr_1/r_0_'+str(condition)+'.txt'    
        empirical_r0s = pd.read_csv(r_0_filename).values.T[0]#, header=None, usecols=[2])

        r_1_filename = 'r_0andr_1/r_1_'+str(condition)+'.txt'    
        empirical_r1s = pd.read_csv(r_1_filename).values.T[0]#, header=None, usecols=[2])

        #For N iterations, draw m pairs of r_1,r_0, and mean(r_1-r_0) using only the subsample
        for i in range(N):
            #randomly select m r_1,r_0 pairs from all r_1s,r_0s for the condition
            #draw m of r_1
            r_1_draw_idx = np.arange(len(empirical_r1s))
            np.random.shuffle(r_1_draw_idx)
            r_1_draw = empirical_r1s[r_1_draw_idx[:m]]
            #Find the r_0 with the same index (=fly index) as those drawn above
            r_0_draw = empirical_r0s[r_1_draw_idx[:m]]

            #Just compute the mean r1-r0 for the subsample
            cs[condition-1,i] = np.mean(r_1_draw-r_0_draw)
             
            fit_stds[condition-1,i] = np.std(np.degrees(r_1_draw-r_0_draw))


    #Plot the distribution of ks (plot 1) and cs (plot 2) from all k iterations for each condition
        
    colors = ['blue','orange','red']

    kmin,kmax = -1.5,5 
    plt.figure(figsize=(12,12))
    alpha = 0.1
    
    cs = np.degrees(cs)    

    ax1 = plt.subplot(321)
    cmin,cmax = 0,180
    cbins = np.linspace(cmin,cmax,n_bins)
    n,_,_=plt.hist(cs[0,:],bins=cbins,label='1F',alpha=alpha,color=colors[0])
    plt.step(cbins,zero_insert(n),color=colors[0])
    n_c,_,_=plt.hist(cs[1,:],bins=cbins,label='2F60',alpha=alpha,color=colors[1])
    plt.step(cbins,zero_insert(n_c),color=colors[1])
    n,_,_=plt.hist(cs[2,:],bins=cbins,label='2F90',alpha=alpha,color=colors[2])
    plt.step(cbins,zero_insert(n),color=colors[2])
    plt.legend()
    plt.title('r1-r0')
    
    ax0 = plt.subplot(322)
    bins = np.linspace(0,50,n_bins)
    n,bins,_=plt.hist(fit_stds[0,:],bins=bins,label='1F',alpha=alpha,color=colors[0])
    plt.step(bins,zero_insert(n),color=colors[0])
    n,_,_=plt.hist(fit_stds[1,:],bins=bins,label='2F60',alpha=alpha,color=colors[1])
    plt.step(bins,zero_insert(n),color=colors[1])
    n,_,_=plt.hist(fit_stds[2,:],bins=bins,label='2F90',alpha=alpha,color=colors[2])
    plt.step(bins,zero_insert(n),color=colors[2])
    plt.xlim(0,50)
    plt.title('r1-r0 stds')
  




 
sm =  slider('m',1,23,1,8)
slog_N =  slider('logN',3,10,1,8)
sn_bins =  slider('n_bins',5,100,5,50)


sliders = [sm,slog_N,sn_bins]

items = [Box([slider]) for slider in sliders]

ui = Box(items, layout=Layout(
    display='flex',
    flex_flow='column',
    border='solid 2px',
    align_items='stretch',
    width='50%'
))

slider_names = [slider.description for slider in sliders]
param_dict =  dict(zip(slider_names,sliders))

out = widgets.interactive_output(g, param_dict)

display(ui,out)


Box(children=(Box(children=(FloatSlider(value=8.0, continuous_update=False, description='m', max=23.0, min=1.0…

Output()