### Generates df with auditory responsiveness and selectivity. 

#### Run this notebook after creating the /PlaybackPkl files inside the site folder using PlotSpikeSortedCategoriesGUI.ipynb

#### You should only have to modify the rootPath in cell 2 for this notebook to work.


In [1]:
# Dependencies 
import os
import glob
import re
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle as pk
from scipy.stats import t



### Set Paths
Note that it is assumed that you have data directory accessible from the rootPath

In [2]:
rootPath = '/Users/frederictheunissen/Code/songephys/'
dataPath = 'data/birds/'

birds = [
    os.path.basename(bird)
    for bird in sorted(glob.glob(os.path.join(rootPath+dataPath, "*")), key=os.path.getmtime, reverse=True)
    if (os.path.isdir(bird))]

### Functions

In [3]:
# These 3 z_score_stim functions could be combinned into 1.

def calc_zscore_stim(stimName):
    global dfRelTime
    
    rows = []
    # This loop finds all rows that match but there should only be one.
    for index, row in dfRelTime.iterrows():
        if row['file'] == stimName:
            rows.append(row)
    if (len(rows) != 1):
        print('Stimulus not found or too many')
        return 0.0, 1.0, 0
    
    # Choose one and only
    row = rows[0]
    
    # Calculates response diff for each stim
    spikeDiff = np.zeros(row['nTrials'])
    for it in range(row['nTrials']):
        spikeDiff[it] = np.sum((row['spikeTimes'][it] >= 0) & (row['spikeTimes'][it] < 0.5)) - np.sum((row['spikeTimes'][it] >= -0.5) & (row['spikeTimes'][it] < 0)) 
           
    # Calculate z-score and pvalue
    if (row['nTrials'] > 1) :
        sdiffSD = np.std(spikeDiff, ddof=1)
        if sdiffSD == 0:
            spikeDiff[0] += 1
            sdiffSD = np.std(spikeDiff, ddof=1)

        zscore = np.mean(spikeDiff)/sdiffSD
        if (zscore < 0.0):
            pvalue = (t.cdf(zscore*np.sqrt(row['nTrials']), row['nTrials']))*2.0
        else:
            pvalue = (1.0 - t.cdf(zscore*np.sqrt(row['nTrials']), row['nTrials']))*2.0
    else:
        zscore = 0
        pvalue = 1.0
        
    return zscore, pvalue, row['nTrials']

def calc_zscore_category(catName):
    global dfRelTime
    
    rows = []
    # This loop finds all rows that match but there should only be one.
    
    nTotal = 0
    for index, row in dfRelTime.iterrows():
        if row['call_type'] == catName:
            rows.append(row)
            nTotal += row['nTrials']
    if (nTotal == 0):
        return 0.0, 1.0, nTotal
    
    spikeDiff = np.zeros(nTotal)
    itot = 0
    for row in rows:
        for it in range(row['nTrials']):
            spikeDiff[itot] = np.sum((row['spikeTimes'][it] >= 0) & (row['spikeTimes'][it] < 0.5)) - np.sum((row['spikeTimes'][it] >= -0.5) & (row['spikeTimes'][it] < 0)) 
            itot += 1
            
    # Calculate z-score and pvalue
    if (nTotal > 1) :
        sdiffSD = np.std(spikeDiff, ddof=1)
        if sdiffSD == 0:
            spikeDiff[0] += 1
            sdiffSD = np.std(spikeDiff, ddof=1)

        zscore = np.mean(spikeDiff)/sdiffSD
        if (zscore < 0.0):
            pvalue = (t.cdf(zscore*np.sqrt(row['nTrials']), row['nTrials']))*2.0
        else:
            pvalue = (1.0 - t.cdf(zscore*np.sqrt(row['nTrials']), row['nTrials']))*2.0
    else:
        zscore = 0
        pvalue = 1.0
        
    return zscore, pvalue, nTotal


def calc_zscore_all():  
    global dfRelTime

    # Loop through all stims and trials.
    nTotal = 0
    for index, row in dfRelTime.iterrows():
        nTotal += row['nTrials']
            
    spikeDiff = np.zeros(nTotal)
    itot = 0
    for index, row in dfRelTime.iterrows():
        for it in range(row['nTrials']):        
            # Spike difference first 500 ms
            spikeDiff[itot] = np.sum((row['spikeTimes'][it] >= 0) & (row['spikeTimes'][it] < 0.5)) - np.sum((row['spikeTimes'][it] >= -0.5) & (row['spikeTimes'][it] < 0)) 
            itot += 1

    # Calculate z-score and pvalue
    if (nTotal > 1) :
        sdiffSD = np.std(spikeDiff, ddof=1)
        if sdiffSD == 0:
            spikeDiff[0] += 1   # Add a spike to generate SD
            sdiffSD = np.std(spikeDiff, ddof=1)
            
        zscore = np.mean(spikeDiff)/sdiffSD
        if (zscore < 0.0):
            pvalue = (t.cdf(zscore*np.sqrt(nTotal), nTotal))*2.0
        else:
            pvalue = (1.0 - t.cdf(zscore*np.sqrt(nTotal), nTotal))*2.0
    else: 
        zscore = 0
        pvalue = 1.0
        
    return zscore, pvalue, nTotal

    
def load_playbackPkl(playPklPath, playbackPkl):
    global unitInfo, dfRelTime
                
    # Load unitInfo and data frames 
    pklFile = playPklPath + playbackPkl
    try:
        fileIn = open(pklFile, 'rb')
        try:
            unitInfo = pk.load(fileIn)
            dfAbsTime = pk.load(fileIn)
            dfRelTime = pk.load(fileIn)
            fileIn.close()
        except:
            print('Empty file: ', pklFile)
    except OSError as err:
        print("OS error: {0}".format(err))

    
    return
               



### Loop through data and make summary data frame w results

In [4]:
dfSummary = pd.DataFrame(columns=['bird', 'site', 'unit', 'snr',
                                  'zAud', 'pAud', 'nTot',
                                  'selInd', 'calls', 
                                  'zcall', 'pcall', 'ncall'])



for bird in birds:    
    # Find sites
    sites = [ 
        os.path.basename(site)
        for site in glob.glob(os.path.join(rootPath+dataPath+bird+'/sites/', "*"))
    ]
    
    # Loop through sites
    for site in sites:
        playPklPath = rootPath + dataPath + bird + '/sites/' + site + '/PlaybackPkl/'
    
        # Set up list of Playback files available for this site
        playPkls = [ 
            os.path.basename(efile)
            for efile in glob.glob(playPklPath + '*.pkl')
        ]
        for playPkl in playPkls:
            load_playbackPkl(playPklPath, playPkl)
            
            # Get a measure of auditory strength
            zTot, pTot, nTot = calc_zscore_all()
                        
            # Loop through call categories to get measure of call-type selectivity
            calls = dfRelTime['call_type'].unique()
            calls = calls[calls != None]    # Removing None (other stims)
            ncalls = len(calls)
            
            if (ncalls == 0) :        # This happens if there is data for the other stims..
                continue
            zcall = np.zeros(ncalls)
            pcall = np.zeros(ncalls)
            ncall = np.zeros(ncalls)
            
            for icall, call in enumerate(calls):
                zcall[icall], pcall[icall], ncall[icall] = calc_zscore_category(call)
             
            # This is the selectivity index from Vinje and Gallant
            selInd = (1-(zcall.mean()**2/(zcall**2).mean()))/(1-1/ncalls)
            
            dfSummary = dfSummary.append( {'bird': bird, 'site': site, 'unit': playPkl, 'snr': unitInfo['SpikeSNR'],
                               'zAud': zTot, 'pAud': pTot, 'nTot': nTot,
                               'selInd': selInd, 'calls': calls, 
                               'zcall': zcall, 'pcall': pcall, 'ncall': ncall}, ignore_index = True )
            
        
    
    



### Save results

In [5]:
outPath = rootPath+dataPath+'summarySel.pkl'
fileOut = open(outPath,"wb")
pk.dump(dfSummary, fileOut)
fileOut.close()

In [6]:
dfSummary[dfSummary['zAud']>1.0]

Unnamed: 0,bird,site,unit,snr,zAud,pAud,nTot,selInd,calls,zcall,pcall,ncall
87,ZF7F,ZF7F_6t_190724_104215,goodPlayback-e19-c54.pkl,7.25724,1.138229,0.0,1251,0.035466,"[Ag, DC, Ne, So, Te, Th, Wh, Di, Be, LT]","[1.6499195288740391, 1.276203377219764, 1.3122...","[0.00679146790465035, 0.011814722089803498, 0....","[61.0, 109.0, 104.0, 63.0, 108.0, 36.0, 90.0, ..."
174,ZF6M,ZF6M_7_5t_190723_100723,goodPlayback-e12-c33.pkl,4.426448,1.141257,0.0,542,0.023082,"[Ag, DC, Ne, So, Te, Th, Wh, Di, Be, LT]","[1.5139405556684058, 1.8205206762478825, 1.166...","[0.37162083948509483, 0.05113745493059829, 0.0...","[34.0, 50.0, 55.0, 32.0, 50.0, 20.0, 40.0, 21...."
236,ZF6M,ZF6M_8t_190725_105432,goodPlayback-e12-c1.pkl,4.376059,1.056955,0.0,383,0.137115,"[Ag, DC, Ne, So, Te, Th, Wh, Di, Be, LT]","[1.5067117222627988, 1.1399677724816777, 0.974...","[0.16680947249069078, 0.4584201631468403, 0.50...","[14.0, 29.0, 29.0, 15.0, 30.0, 10.0, 28.0, 8.0..."
415,ZF5M,ZF5M_9_5t_190701_125653,goodPlayback-e10-c115.pkl,10.042025,1.272586,0.0,600,0.080397,"[Ag, DC, Ne, So, Te, Th, Wh, Di, Be, LT]","[2.0499046191734522, 1.7615775681448245, 1.486...","[0.002401143671098449, 0.005010168212713051, 0...","[41.0, 68.0, 70.0, 34.0, 68.0, 19.0, 64.0, 18...."
457,ZF5M,ZF5M_10_5t_190703_133641,goodPlayback-e10-c29.pkl,7.111036,1.018474,0.0,309,0.102889,"[Ag, DC, Ne, So, Te, Th, Wh, Di, Be, LT]","[1.2460382596106578, 1.231049905961469, 0.6968...","[0.11977836164741484, 0.12275051115857472, 0.4...","[22.0, 30.0, 35.0, 18.0, 32.0, 12.0, 31.0, 12...."
547,ZF4F,ZF4F_2t_190611_160843,goodPlayback-e17-c3.pkl,5.238627,1.300097,0.0,369,0.143562,"[Ag, DC, Ne, So, Te, Th, Wh, Di, Be, LT]","[1.3772692864146796, 1.6984738641739847, 0.789...","[0.19080291795775683, 0.138264899576932, 0.189...","[24.0, 29.0, 39.0, 17.0, 25.0, 11.0, 28.0, 13...."
557,ZF4F,ZF4F_2t_190611_160843,goodPlayback-e10-c3.pkl,4.866822,1.082413,0.0,370,0.074091,"[Ag, DC, Ne, So, Te, Th, Wh, Di, Be, LT]","[1.0332869184380906, 0.8605028085774643, 0.586...","[0.2814134687289416, 0.3477416693705764, 0.306...","[23.0, 29.0, 38.0, 17.0, 25.0, 11.0, 28.0, 13...."
558,ZF4F,ZF4F_2t_190612_101201,goodPlayback-e10-c1.pkl,4.771658,1.020162,0.0,715,0.084243,"[Ag, DC, Ne, So, Te, Th, Wh, Di, Be, LT]","[1.6518261594069656, 0.9321284495344814, 1.231...","[0.14454821183965993, 0.13573000596856621, 0.0...","[33.0, 50.0, 54.0, 37.0, 59.0, 18.0, 50.0, 19...."
571,ZF4F,ZF4F_2t_190612_101201,goodPlayback-e18-c4.pkl,4.882776,1.152756,0.0,719,0.127898,"[Ag, DC, Ne, So, Te, Th, Wh, Di, Be, LT]","[1.680540359765969, 1.3550639642125526, 0.7798...","[0.14063483459756632, 0.05353020326436697, 0.1...","[33.0, 50.0, 54.0, 37.0, 59.0, 18.0, 49.0, 19...."
583,ZF4F,ZF4F_2t_190612_134337,goodPlayback-e10-c1.pkl,4.784917,1.33799,0.0,597,0.156337,"[Ag, DC, Ne, So, Te, Th, Wh, Di, Be, LT]","[1.3089910981261965, 1.1698598331921444, 1.339...","[0.018451473635978433, 0.07940270443583541, 0....","[36.0, 56.0, 62.0, 39.0, 66.0, 18.0, 51.0, 22...."


In [1]:
dfRelTime

NameError: name 'dfRelTime' is not defined