### Generates df with auditory responsiveness and selectivity. 

#### Run this notebook after creating the /PlaybackPkl files inside the bird folder. For Julie's data these were made by translating the h5 files  using readUnith5_file.ipynb

#### You should only have to modify the rootPath in cell 2 for this notebook to work.


In [1]:
# Dependencies 
import os
import glob
import re
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle as pk
from scipy.stats import t



### Set Paths
Note that it is assumed that you have data directory accessible from the rootPath

In [2]:
rootPath = '/Users/frederictheunissen/Google Drive File Stream/My Drive/julie/'
pklPath = 'pkl'


# The 6 birds from Julie data set
birds = ['BlaBro09xxF', 'GreBlu9508M', 'WhiBlu5396M', 'LblBlu2028M', 'WhiWhi4522M', 'YelBlu6903F']

### Functions

In [3]:
# These 3 z_score_stim functions could be combinned into 1.

def calc_zscore_stim(stimName):
    global dfRelTime
    
    rows = []
    # This loop finds all rows that match but there should only be one.
    for index, row in dfRelTime.iterrows():
        if row['file'] == stimName:
            rows.append(row)
    if (len(rows) != 1):
        print('Stimulus not found or too many')
        return 0.0, 1.0, 0
    
    # Choose one and only
    row = rows[0]
    
    # Calculates response diff for each stim
    spikeDiff = np.zeros(row['nTrials'])
    for it in range(row['nTrials']):
        spikeDiff[it] = np.sum((row['spikeTimes'][it] >= 0) & (row['spikeTimes'][it] < 0.5)) - np.sum((row['spikeTimes'][it] >= -0.5) & (row['spikeTimes'][it] < 0)) 
           
    # Calculate z-score and pvalue
    if (row['nTrials'] > 1) :
        sdiffSD = np.std(spikeDiff, ddof=1)
        if sdiffSD == 0:
            spikeDiff[0] += 1
            sdiffSD = np.std(spikeDiff, ddof=1)

        zscore = np.mean(spikeDiff)/sdiffSD
        if (zscore < 0.0):
            pvalue = (t.cdf(zscore*np.sqrt(row['nTrials']), row['nTrials']))*2.0
        else:
            pvalue = (1.0 - t.cdf(zscore*np.sqrt(row['nTrials']), row['nTrials']))*2.0
    else:
        zscore = 0
        pvalue = 1.0
        
    return zscore, pvalue, row['nTrials']

def calc_zscore_category(catName):
    global dfRelTime
    
    rows = []
    # This loop finds all rows that match but there should only be one.
    
    nTotal = 0
    for index, row in dfRelTime.iterrows():
        if row['call_type'] == catName:
            rows.append(row)
            nTotal += row['nTrials']
    if (nTotal == 0):
        return 0.0, 1.0, nTotal
    
    spikeDiff = np.zeros(nTotal)
    itot = 0
    for row in rows:
        for it in range(row['nTrials']):
            spikeDiff[itot] = np.sum((row['spikeTimes'][it] >= 0) & (row['spikeTimes'][it] < 0.5)) - np.sum((row['spikeTimes'][it] >= -0.5) & (row['spikeTimes'][it] < 0)) 
            itot += 1
            
    # Calculate z-score and pvalue
    if (nTotal > 1) :
        sdiffSD = np.std(spikeDiff, ddof=1)
        if sdiffSD == 0:
            spikeDiff[0] += 1
            sdiffSD = np.std(spikeDiff, ddof=1)

        zscore = np.mean(spikeDiff)/sdiffSD
        if (zscore < 0.0):
            pvalue = (t.cdf(zscore*np.sqrt(row['nTrials']), row['nTrials']))*2.0
        else:
            pvalue = (1.0 - t.cdf(zscore*np.sqrt(row['nTrials']), row['nTrials']))*2.0
    else:
        zscore = 0
        pvalue = 1.0
        
    return zscore, pvalue, nTotal


def calc_zscore_all():  
    global dfRelTime

    # Loop through all stims and trials.
    nTotal = 0
    for index, row in dfRelTime.iterrows():
        nTotal += row['nTrials']
            
    spikeDiff = np.zeros(nTotal)
    itot = 0
    for index, row in dfRelTime.iterrows():
        for it in range(row['nTrials']):        
            # Spike difference first 500 ms
            spikeDiff[itot] = np.sum((row['spikeTimes'][it] >= 0) & (row['spikeTimes'][it] < 0.5)) - np.sum((row['spikeTimes'][it] >= -0.5) & (row['spikeTimes'][it] < 0)) 
            itot += 1

    # Calculate z-score and pvalue
    if (nTotal > 1) :
        sdiffSD = np.std(spikeDiff, ddof=1)
        if sdiffSD == 0:
            spikeDiff[0] += 1   # Add a spike to generate SD
            sdiffSD = np.std(spikeDiff, ddof=1)
            
        zscore = np.mean(spikeDiff)/sdiffSD
        if (zscore < 0.0):
            pvalue = (t.cdf(zscore*np.sqrt(nTotal), nTotal))*2.0
        else:
            pvalue = (1.0 - t.cdf(zscore*np.sqrt(nTotal), nTotal))*2.0
    else: 
        zscore = 0
        pvalue = 1.0
        
    return zscore, pvalue, nTotal

    
def load_playbackPkl(playbackPkl):
    global unitInfo, dfRelTime
                
    # Load unitInfo and data frames 
    pklFile = playbackPkl
    try:
        fileIn = open(pklFile, 'rb')
        try:
            unitInfo = pk.load(fileIn)
            dfAbsTime = pk.load(fileIn)
            dfRelTime = pk.load(fileIn)
            fileIn.close()
        except:
            print('Empty file: ', pklFile)
    except OSError as err:
        print("OS error: {0}".format(err))

    
    return
               



### Loop through data and make summary data frame w results

In [9]:
dfSummary = pd.DataFrame(columns=['bird', 'site', 'unit', 'snr',
                                  'zAud', 'pAud', 'nTot',
                                  'selInd', 'calls', 
                                  'zcall', 'pcall', 'ncall'])

callTypes = ['Ag', 'DC', 'Ne', 'So', 'Te', 'Th', 'Wh', 'Di', 'Be', 'LT']

for bird in birds:    
    # Find pklfiles
    pklfiles = glob.glob(os.path.join(rootPath,pklPath,bird, "*.pkl"))
       
    # Loop through sites
    for playPkl in pklfiles:
        load_playbackPkl(playPkl)
            
        # Get a measure of auditory strength
        zTot, pTot, nTot = calc_zscore_all()
                        
        # Loop through call categories to get measure of call-type selectivity
        calls = dfRelTime['call_type'].unique()
        calls = list( set(calls) & set(callTypes))
        ncalls = len(calls)
            
        if (ncalls == 0) :        # This happens if there is only data for the other stims..
            continue
        zcall = np.zeros(ncalls)
        pcall = np.zeros(ncalls)
        ncall = np.zeros(ncalls)
            
        for icall, call in enumerate(calls):
            zcall[icall], pcall[icall], ncall[icall] = calc_zscore_category(call)
             
        # This is the selectivity index from Vinje and Gallant
        selInd = (1-(zcall.mean()**2/(zcall**2).mean()))/(1-1/ncalls)
            
        site = unitInfo['Site']
        dfSummary = dfSummary.append( {'bird': bird, 'site': site, 'unit': playPkl, 'snr': unitInfo['SpikeSNR'],
                               'zAud': zTot, 'pAud': pTot, 'nTot': nTot,
                               'selInd': selInd, 'calls': calls, 
                               'zcall': zcall, 'pcall': pcall, 'ncall': ncall}, ignore_index = True )
            
        
    
    

### Save results

In [10]:
outPath = rootPath+pklPath+'/summarySelJulie.pkl'
fileOut = open(outPath,"wb")
pk.dump(dfSummary, fileOut)
fileOut.close()

In [11]:
dfSummary[dfSummary['zAud']>1.0]

Unnamed: 0,bird,site,unit,snr,zAud,pAud,nTot,selInd,calls,zcall,pcall,ncall
9,BlaBro09xxF,L1500R1500,/Users/frederictheunissen/Google Drive File St...,3.848209,1.486018,0.0,1300,0.108176,"[So, Th, DC, Di, Te, LT, Be, Ag, Ne]","[2.468233549673852, 0.9605127629068184, 1.7362...","[1.4602469355917691e-05, 0.012517833974261316,...","[270.0, 80.0, 170.0, 60.0, 180.0, 60.0, 120.0,..."
17,BlaBro09xxF,L2000R1600,/Users/frederictheunissen/Google Drive File St...,5.890425,1.000882,0.0,1360,0.074086,"[So, Th, DC, Di, Te, LT, Be, Ag, Ne]","[1.2245763854927572, 0.6945874473600855, 1.488...","[0.00309677321359092, 0.05275454533662738, 0.0...","[278.0, 83.0, 179.0, 61.0, 197.0, 62.0, 126.0,..."
18,BlaBro09xxF,L2000R1600,/Users/frederictheunissen/Google Drive File St...,2.868108,1.124173,0.0,1360,0.257089,"[So, Th, DC, Di, Te, LT, Be, Ag, Ne]","[2.09724789347678, 0.04851180714847927, 0.9329...","[5.8386846782054747e-05, 0.88112821216792, 0.0...","[278.0, 83.0, 179.0, 61.0, 197.0, 62.0, 126.0,..."
35,BlaBro09xxF,L2000R1600,/Users/frederictheunissen/Google Drive File St...,2.907910,1.510274,0.0,1360,0.180920,"[So, Th, DC, Di, Te, LT, Be, Ag, Ne]","[2.892092848017875, 0.517127844603008, 1.83200...","[3.5812068945872966e-06, 0.13303087535557, 8.0...","[278.0, 83.0, 179.0, 61.0, 197.0, 62.0, 126.0,..."
41,BlaBro09xxF,L2000R1600,/Users/frederictheunissen/Google Drive File St...,3.989365,1.526631,0.0,1360,0.071527,"[So, Th, DC, Di, Te, LT, Be, Ag, Ne]","[2.385094834263013, 1.2963143734511435, 2.2881...","[1.9651034655066013e-05, 0.002146961168588035,...","[278.0, 83.0, 179.0, 61.0, 197.0, 62.0, 126.0,..."
...,...,...,...,...,...,...,...,...,...,...,...,...
1177,YelBlu6903F,L500R400,/Users/frederictheunissen/Google Drive File St...,4.340285,1.383195,0.0,1290,0.067633,"[So, Th, DC, Di, Te, LT, Be, Ag, Ne]","[2.254212324148441, 1.6211664742455039, 1.4567...","[3.1854642793538446e-05, 0.0004463928694862229...","[270.0, 70.0, 170.0, 50.0, 180.0, 80.0, 120.0,..."
1178,YelBlu6903F,L1000R900,/Users/frederictheunissen/Google Drive File St...,5.562580,1.092768,0.0,1310,0.011799,"[So, Th, DC, Di, Te, LT, Be, Ag, Ne]","[1.0838620000277912, 1.3005836740817538, 1.116...","[0.006465842805533306, 0.0012278333626731808, ...","[273.0, 69.0, 172.0, 51.0, 187.0, 82.0, 120.0,..."
1192,YelBlu6903F,L1000R900,/Users/frederictheunissen/Google Drive File St...,3.977654,1.527423,0.0,1310,0.062063,"[So, Th, DC, Di, Te, LT, Be, Ag, Ne]","[2.7818231459281977, 1.674301336821991, 1.4138...","[5.078317592888126e-06, 0.00017197582491634833...","[273.0, 69.0, 172.0, 51.0, 187.0, 82.0, 120.0,..."
1197,YelBlu6903F,L1000R900,/Users/frederictheunissen/Google Drive File St...,2.791742,1.280894,0.0,1310,0.107364,"[So, Th, DC, Di, Te, LT, Be, Ag, Ne]","[1.6073841508762454, 0.6329409064405153, 1.629...","[0.00047569815043302377, 0.05968008153863602, ...","[273.0, 69.0, 172.0, 51.0, 187.0, 82.0, 120.0,..."
