## Calculates the duration of recordings and the gaps

In [1]:
# Dependencies 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle as pk
from scipy.special import erf
import time

from sklearn.neighbors import KernelDensity

# Theunissen Lab Code
import suss.io
from analysis.playbacks.categories import create_stimulus_dataframe, inject_spikes, relative_spike_times

In [2]:
# Set paths
rootPathH = '/Users/frederictheunissen/Code/songephys/'
dataPathH = 'data/birds/'



In [3]:
# Read data files 
inPathH = rootPathH+dataPathH+'summarySel.pkl'
fileInH = open(inPathH,"rb")
dfSummaryH = pk.load(fileInH)
fileInH.close()


In [4]:
# Number of auditory units

nCellsH = dfSummaryH.shape[0]
nAudH = sum(dfSummaryH['pAud']< 0.01)

nInhH = sum((dfSummaryH['pAud']< 0.01) & (dfSummaryH['zAud']< 0.0))

print("Awake behaving data set:")
print(nAudH,'/',nCellsH, '(', nAudH*100.0/nCellsH, ') auditory units')
print(nInhH,'/',nAudH, '(', nInhH*100.0/nAudH, ') are inhibited by sound')



# Repeat with single Units by SNR > 5

nAudSUH = sum((dfSummaryH['pAud']< 0.01) & (dfSummaryH['snr'] >= 5.0))

nInhSUH = sum((dfSummaryH['pAud']< 0.01) & (dfSummaryH['zAud']< 0.0) & (dfSummaryH['snr'] >= 5.0))

print("Awake behaving data set Single Units:")
print(nAudSUH,'/',nCellsH, '(', nAudSUH*100.0/nCellsH, ') auditory single units')
print(nInhSUH,'/',nAudSUH, '(', nInhSUH*100.0/nAudSUH, ') are inhibited by sound')




Awake behaving data set:
439 / 732 ( 59.97267759562842 ) auditory units
62 / 439 ( 14.123006833712983 ) are inhibited by sound
Awake behaving data set Single Units:
292 / 732 ( 39.89071038251366 ) auditory single units
46 / 292 ( 15.753424657534246 ) are inhibited by sound


In [5]:
# Read the results of Part 1 Runs that includes all neurons

# Read the PC and Confusion matrices data base
# Use the following data if NW
#inPath = rootPath+dataPath+'HerminaDataBase1UnitPCNW.pkl'

inPath = rootPathH+dataPathH+'HerminaDataBase1UnitPC.pkl'
fileIn = open(inPath,"rb")
unitNamesFull = pk.load(fileIn)
confMatTot = pk.load(fileIn)
pcc = pk.load(fileIn)
fileIn.close()

# These are the neurons that we will be keeping  Use 13 if NW 12 otherwise.
indGood = np.argwhere(pcc > 12).flatten()
nGood = indGood.shape[0]


pccGood = pcc[indGood]
unitNamesGood = [unitNamesFull[i] for i in indGood]

indSort = np.flip(np.argsort(pccGood))



In [6]:
def load_playbackPkl(playPklPath, playbackPkl):
    global unitInfo, dfAbsTime, dfRelTime
                
    # Load unitInfo and data frames - I don't need the times here
    pklFile = playPklPath + playbackPkl
    try:
        fileIn = open(pklFile, 'rb')
        try:
            unitInfo = pk.load(fileIn)
            #dfAbsTime = pk.load(fileIn)
            #dfRelTime = pk.load(fileIn)
            fileIn.close()
        except:
            print('Empty file: ', pklFile)
    except OSError as err:
        print("OS error: {0}".format(err))

    
    return

In [30]:
def good_rate(bird, site, electrode, cluster, rateThresh, bandWidth):
    
    global X_anal, X_good, df
    
    if cluster == 'None':
        print('Cluster is not defined')
        return
    # Load the raw spike data
    # Set Paths
    spikesPath = rootPathH + dataPathH + bird + '/sites/' + site + '/manually_curated/curated-e' + electrode + '.pkl'

    # Load spike data and get a list of clusters
    spike_file = suss.io.read_pickle(spikesPath)
    
    # Process the spikefile to get info
    labeled_nodes_dict = dict(spike_file.labeled_nodes)
    spikes = labeled_nodes_dict[cluster]
    spikeTimes = spikes.flatten().times.reshape(-1,1)
    
    # Load the events file

    eventsPath = rootPathH + dataPathH + bird + '/sites/' + site + '/vocal_periods.npy'
    events = np.load(eventsPath, allow_pickle=True)[()]

    # spikeWaveforms = spikes.flatten().waveforms
    df = create_stimulus_dataframe(events["playback"])
    df = inject_spikes(df, spikeTimes.flatten(), t_buffer=10)
    
    
    # Generate a continuous spike rate with a wide filter
    kde = KernelDensity(kernel='gaussian', bandwidth=bandWidth).fit(spikeTimes)

    tStart = np.fix(spikeTimes[0]-2)
    tEnd = np.fix(spikeTimes[-1]+2)
    npts = int(tEnd-tStart)+1

    X_anal = np.linspace(np.reshape(tStart, (1,)), np.reshape(tEnd, (1,)), npts)

    spikeDens = np.exp(kde.score_samples(X_anal))
    spikeDens = spikeDens*len(spikeTimes)/sum(spikeDens) 

    # Put a threshold for rate above rateThres in spikes/s
    X_good = spikeDens > rateThresh
    timeGood = np.array(X_good, dtype = 'int16')*rateThresh
    
    # Find maximum and minum times in events used in analysis
    tBeg = np.min(df.loc[df['call_type'] != None]['start_time'])
    tEnd = np.max(df.loc[df['call_type'] != None]['stop_time'])
    
    # calculate recording times and gap times
    tgaps = []
    trecs = []

    tbeggap = 0
    tbegrec = 0
    prev = False
    first = True
    for i, val in enumerate(X_good):
        if (X_anal[i] < tBeg):
            tbeggap = i+1
            tbegrec = i+1
            continue
        if val & ~prev:   # Step on
            prev = True
            dt = i - tbeggap
            tbegrec = i
            if not first:
               tgaps.append(dt)
            first = False
        elif ~val & prev: # Step off
            prev = False
            dt = i - tbegrec
            trecs.append(dt)
            tbeggap = i
        if (X_anal[i] >= tEnd):
            break

    i +=1
    if prev:
       trecs.append(i-tbegrec)
    # else :   last gap does not count because we might have lost unit
    #   tgaps.append(i-tbeggap)
    
    return trecs, tgaps


In [12]:
birds = []
sites = []
pklFiles = []
electrodes = []
clusters = []
rateThs = []
kdeBws = []


# Getting the information for the 100 discriminating units used in the decoder
for ind in indSort:
    bird = unitNamesGood[ind][0:4]
    site = unitNamesGood[ind].split('good')[0][0:-1]
    pklFile = 'good' + unitNamesGood[ind].split('good')[1]
    electrode = pklFile.split('-')[1][1:]
    cluster = (pklFile.split('-')[2][1:]).split('.')[0]
    playPklPath = rootPathH + dataPathH + bird + '/sites/' + site + '/PlaybackPkl/'
    
    # load the unitinfo data
    load_playbackPkl(playPklPath, pklFile)
    
    # Store it
    birds.append(bird)
    sites.append(site)
    pklFiles.append(pklFile)
    electrodes.append(electrode)
    clusters.append(cluster)
    rateThs.append(unitInfo['RateThreshold'])
    kdeBws.append(unitInfo['KDE_BW'])



In [14]:
# Save this data
outPathH = rootPathH+dataPathH+'goodUnitsThInfo.pkl'
fileoutH = open(outPathH,"wb")    
 
unitInfoH = { 'birds': birds, 'sites' : sites,
              'pklFiles': pklFiles, 'electrodes' : electrodes,
              'clusters' : clusters, 'rateThs': rateThs, 'kdeBws': kdeBws} 

pk.dump(unitInfoH, fileoutH)

In [9]:
# Read this data
inPathH = rootPathH+dataPathH+'goodUnitsThInfo.pkl'
fileinH = open(inPathH,"rb") 

unitInfoH = pk.load(fileinH)

birds = unitInfoH['birds']
sites = unitInfoH['sites']
electrodes = unitInfoH['electrodes']
clusters= unitInfoH['clusters']
rateThs = unitInfoH['rateThs']
kdeBws = unitInfoH['kdeBws']


In [41]:
# Run through all units

unitRecs = []
unitGaps = []
unitRecTot = []
unitGapTot = []
unitGapN = []
unitGapMean = []
unitGapMax = []
unitGapMin = []
for i, bird in enumerate(birds):
    trecs, tgaps = good_rate(bird, sites[i], electrodes[i], int(clusters[i]), rateThs[i], kdeBws[i])
    
    unitRecs.append(trecs)
    unitGaps.append(tgaps)
    unitRecTot.append(np.sum(trecs)) 
    unitGapTot.append(np.sum(tgaps))
    unitGapN.append(len(tgaps))
    if (len(tgaps)):    
        unitGapMean.append(np.mean(tgaps))
        unitGapMax.append(np.max(tgaps))
        unitGapMin.append(np.min(tgaps))
    else:
        unitGapMean.append(0.0)
        unitGapMax.append(0.0)
        unitGapMin.append(0.0)
        

    
    

In [42]:
# Print some results

print('Total Recording Time:')
print('\tMean:', time.strftime('%H:%M:%S', time.gmtime(np.mean(np.array(unitGapTot)+np.array(unitRecTot)))))
print('\tStd:', time.strftime('%H:%M:%S', time.gmtime(np.std(np.array(unitGapTot)+np.array(unitRecTot)))))
print('\tMin:', time.strftime('%H:%M:%S', time.gmtime(np.min(np.array(unitGapTot)+np.array(unitRecTot)))))
print('\tMax:', time.strftime('%H:%M:%S', time.gmtime(np.max(np.array(unitGapTot)+np.array(unitRecTot)))))

print('Number of gaps:')
print('\tMean:', np.mean(np.array(unitGapN)))
print('\tStd:', np.std(np.array(unitGapN)))
print('\tMin:', np.min(np.array(unitGapN)))
print('\tMax:', np.max(np.array(unitGapN)))

print('Gap Length:')
print('\tMean:', time.strftime('%H:%M:%S', time.gmtime(np.mean(np.concatenate(unitGaps).flatten()))))
print('\tStd:', time.strftime('%H:%M:%S', time.gmtime(np.std(np.concatenate(unitGaps).flatten()))))
print('\tMin:', time.strftime('%H:%M:%S', time.gmtime(np.min(np.concatenate(unitGaps).flatten()))))
print('\tMax:', time.strftime('%H:%M:%S', time.gmtime(np.max(np.concatenate(unitGaps).flatten()))))

print('Percent time of clean recording:')
print('\tMean:', np.mean(np.array(unitRecTot)/(np.array(unitGapTot)+np.array(unitRecTot))*100))
print('\tMin:', np.min(np.array(unitRecTot)/(np.array(unitGapTot)+np.array(unitRecTot))*100))
print('\tMax:', np.max(np.array(unitRecTot)/(np.array(unitGapTot)+np.array(unitRecTot))*100))


Total Recording Time:
	Mean: 02:45:58
	Std: 01:11:04
	Min: 00:16:21
	Max: 06:30:11
Number of gaps:
	Mean: 10.04
	Std: 8.176698600290951
	Min: 0
	Max: 39
Gap Length:
	Mean: 00:06:44
	Std: 00:10:16
	Min: 00:00:01
	Max: 02:26:43
Percent time of clean recording:
	Mean: 63.821507739715564
	Min: 16.603648738065978
	Max: 100.0
