In [4]:
import sys,os, json, shutil, filecmp
from pathlib import Path
import numpy as np
import pandas as pd

## Parameters

In [10]:
overwrite = 0
nTTs = 16

## Path to Directory

In [5]:
# path to cluster (unix)
animal = 'Li'
date = '062818'
task = 'T3g'
cluster_experimenter = 'AG'
clusterDir = Path('/mnt/c/Users/alexg8/Documents/Data/'+ 
                   animal+'/Clustered/')
assert clusterDir.exists(), 'Cluster Directory not Found'

sessionName = animal+'_'+task+'_'+date+'_KSClusters'
assert (clusterDir / sessionName).exists(), 'Session Not Found'


## Get Animals Json cluster summary

In [84]:
Cl_Summary_fn = animal+'_ClusteringSummary.json'
if (clusterDir/Cl_Summary_fn).exists():
    with open(str(clusterDir/Cl_Summary_fn), 'r') as f:
        cluster_summary = json.load(f)
    if not date in cluster_summary[animal].keys() or overwrite:
        cluster_summary[animal][date]={}
    if not task in cluster_summary[animal][date].keys() or overwrite:
        cluster_summary[animal][date][task] = {}
    else:
        print('Warning clustering data exists for {}, and overwrite = false'.format(sessionName))
else:
    cluster_summary = {}
    cluster_summary[animal] = {}
    cluster_summary[animal][date]={}
    cluster_summary[animal][date][task] = {}




{'T3g': {'nCells': 10,
  'nMua': 16,
  'cell_IDs': {'1': [],
   '2': [],
   '3': [1, 6],
   '4': [],
   '5': [],
   '6': [],
   '7': [3],
   '8': [4, 10],
   '9': [2],
   '10': [0, 6, 8],
   '11': [],
   '12': [],
   '13': [],
   '14': [9],
   '15': [],
   '16': []},
  'mua_IDs': {'1': [7],
   '2': [2, 10],
   '3': [0, 3],
   '4': [2],
   '5': [],
   '6': [3],
   '7': [],
   '8': [3],
   '9': [11],
   '10': [4, 5, 7, 9],
   '11': [],
   '12': [],
   '13': [],
   '14': [1, 8, 10],
   '15': [],
   '16': []}}}

## Get Files and Update Json File

In [87]:
cluster_summary[animal][date][task]['nCells'] = 0
cluster_summary[animal][date][task]['nMua'] = 0
cluster_summary[animal][date][task]['cell_IDs'] = {}
cluster_summary[animal][date][task]['mua_IDs'] = {}
cluster_summary[animal][date][task]['dateClustered']={}
for tt in np.arange(1,nTTs+1):
    cluster_summary[animal][date][task]['cell_IDs'][int(tt)]=[]
    cluster_summary[animal][date][task]['mua_IDs'][int(tt)]=[]
    fn = clusterDir/sessionName/('tt_'+str(tt))/'cluster_group.tsv'
    assert fn.exists(), 'could not find record for tt {}; in {}'.format(tt,sessionName)
    d=pd.read_csv(fn,delimiter='\t')
    cells = np.where(d['group']=='good')[0].tolist()
    mua = np.where(d['group']=='mua')[0].tolist()
    for cc in cells:
        cluster_summary[animal][date][task]['cell_IDs'][int(tt)].append(cc)
    for mm  in mua:
        cluster_summary[animal][date][task]['mua_IDs'][int(tt)].append(mm)
    cluster_summary[animal][date][task]['nCells'] += len(cells)
    cluster_summary[animal][date][task]['nMua'] += len(mua)
    
    cluster_summary[animal][date][task]['dateClustered'][int(tt)]= datetime.datetime.fromtimestamp(int(fn.stat().st_mtime)).strftime("%B %d %Y, %I:%M%p")

cluster_summary[animal][date][task]['dateSummary'] = datetime.datetime.today().strftime("%B %d %Y, %I:%M%p")   
with open(str(clusterDir/Cl_Summary_fn), 'w') as f:
        json.dump(cluster_summary, f, indent=4)

In [204]:
print("Results for {}:\n nCells = {} \n nMuas = {}"
      .format(sessionName,cluster_summary[animal][date][task]['nCells'],
              cluster_summary[animal][date][task]['nMua']))


Results for Li_T3g_062818_KSClusters:
 nCells = 10 
 nMuas = 16


In [29]:
n=0
m=0
for d in cluster_summary[animal].keys():
    n+=cluster_summary[animal][d][task]['nCells']
    m+=cluster_summary[animal][d][task]['nMua']
print(" nSessions = {} \n nCells = {} \n nMua = {}".format(len(cluster_summary[animal].keys()),n,m))


 nSessions = 10 
 nCells = 86 
 nMua = 145


In [237]:
sessionName.split('_')

['Li', 'T3g', '062818', 'KSClusters']

In [206]:
cluster_summary[animal].keys()

dict_keys(['060118', '060418', '061518', '061318', '061818', '062018', '062518', '062618', '062718', '062818'])

## copy results to oak (note: oak must be mounted on the ubuntu shell: )
### sudo mkdir /mnt/o
### sudo mount -t drvfs O: /mnt/o
### to unmount
### sudo unmount /mnt/o

In [231]:
oakPath = Path('/mnt/o/giocomo/alexg/Clustered/'+animal)
clusterDir = Path('/mnt/c/Users/alexg8/Documents/Data/'+ 
                   animal+'/Clustered/')
# copy summary
if not filecmp.cmp(str(clusterDir/Cl_Summary_fn),str(oakPath/Cl_Summary_fn),shallow=True):
    shutil.copyfile(str(clusterDir/Cl_Summary_fn),str(oakPath/Cl_Summary_fn))
    print('updated file summary file')
else:
    print('summary file is the same, skipping copy.')
# copy individual tetrode clusters
notUpDatedList = []
for date in cluster_summary[animal].keys():
    sessionName = animal+'_'+task+'_'+date+'_KSClusters'
    notUpDatedList = []
    for tt in np.arange(1,nTTs+1):
        fn = clusterDir/sessionName/('tt_'+str(tt))/'cluster_group.tsv'
        sp = oakPath/sessionName/('tt_'+str(tt))/'cluster_group.tsv'
        if not filecmp.cmp(str(fn),str(sp),shallow=True):
            shutil.copyfile(str(fn),str(sp))
        else:
            notUpDatedList.append(tt)
            #print('{} {} Cluster file exists and it is the same, not updating'.format(sessionName,tt))
    if len(notUpDatedList)==16:
        print("{}: All tetrodes have already been clustered. ".format(sessionName))
    elif len(notUpDatedList)==0:
        print("{}: Updated all tetrode clusters".format(sessionName))
    else:
        print("{}: Indetical cluster files, no updates for TTs {}".format(sessionName, notUpDatedList))
    

summary file is the same, skipping copy.
Li_T3g_060118_KSClusters: All tetrodes had already been clustered. 
Li_T3g_060418_KSClusters: All tetrodes had already been clustered. 
Li_T3g_061518_KSClusters: All tetrodes had already been clustered. 
Li_T3g_061318_KSClusters: All tetrodes had already been clustered. 
Li_T3g_061818_KSClusters: All tetrodes had already been clustered. 
Li_T3g_062018_KSClusters: All tetrodes had already been clustered. 
Li_T3g_062518_KSClusters: All tetrodes had already been clustered. 
Li_T3g_062618_KSClusters: All tetrodes had already been clustered. 
Li_T3g_062718_KSClusters: All tetrodes had already been clustered. 
Li_T3g_062818_KSClusters: All tetrodes had already been clustered. 


In [6]:
oakPath = Path('/mnt/o/giocomo/alexg/Clustered/'+animal)

In [37]:
for i in oakPath.glob('*'):
    if i.match('*.json'):
        with i.open() as f:
            cl_summary = json.load(f)
    else:
        print(i.name.split('_'))
    

['Li', 'OF', '060118', 'KSClusters']
['Li', 'OF', '060418', 'KSClusters']
['Li', 'OF', '060518', 'KSClusters']
['Li', 'OF', '060718', 'KSClusters']
['Li', 'OF', '060818', 'KSClusters']
['Li', 'OF', '061118', 'KSClusters']
['Li', 'OF', '061218', 'KSClusters']
['Li', 'OF', '061318', 'KSClusters']
['Li', 'OF', '061418', 'KSClusters']
['Li', 'OF', '061518', 'KSClusters']
['Li', 'OF', '061818', 'KSClusters']
['Li', 'OF', '062018', 'KSClusters']
['Li', 'OF', '062518', 'KSClusters']
['Li', 'OF', '062618', 'KSClusters']
['Li', 'OF', '062718', 'KSClusters']
['Li', 'OF', '062818', 'KSClusters']
['Li', 'OF', '062918', 'KSClusters']
['Li', 'OF', '070218', 'KSClusters']
['Li', 'OF', '070618', 'KSClusters']
['Li', 'OF', '071018', 'KSClusters']
['Li', 'OF', '080118', 'KSClusters']
['Li', 'OF', '080218', 'KSClusters']
['Li', 'T3g', '060118', 'KSClusters']
['Li', 'T3g', '060418', 'KSClusters']
['Li', 'T3g', '061318', 'KSClusters']
['Li', 'T3g', '061518', 'KSClusters']
['Li', 'T3g', '061818', 'KSCluster

In [42]:
cnt =0
for i in oakPath.glob('*.json'):
    if cnt >1:
        print('Found Two Json Files. Only one file allowed.')
        break
    with i.open() as f:
        cl_summary = json.load(f)
    cnt+=1

colnames = ['SessionDate','Task','Animal','Clustered','SummaryDate','nCells','nMua','BestTT']
emptyEntry = {key: [0] for key in colnames}
# load existing table
if (oakPath/'ClusterTableSummary.csv').exists():
    d=pd.read_csv('temp.csv',index_col=0)
else:
# create new table
    Sessions =[]
    Dates = []
    Tasks = []
    Animals =[]
    for i in oakPath.glob('*_KSClusters'):
        tmp = i.name.split('_')
        Dates.append(tmp[2])
        Tasks.append(tmp[1])
        Animals.append(tmp[0])
        Sessions.append(i.name.strip('_KSClusters'))
    d = pd.DataFrame(0,index = Sessions, columns=colnames)
    d['Task']=Tasks
    d['SessionDate']=Dates
    d['Animal']=Animals

cnt =0
for i in oakPath.glob('*_KSClusters'):
    sn = i.name.strip('_KSClusters')
    # add new session entry into previously existing table
    if not sn in d.index.values:
        tmp = i.name.split('_')
        d.append(pd.DataFrame(emptyEntry,index=sn))
        d.at[sn,'SessionDate'] = tmp[2]
        d.at[sn,'Task']= tmp[1]
        d.at[sn,'Animal']=tmp[0]
    
    # if the session was clustered. add info to the table.
    if date in cl_summary.keys():
        try:
            d.at[sn,'Clustered'] = 1

            info = cl_summary[animal][date][task]
            d.at[sn,'nCells'] = info['nCells']
            d.at[sn,'nMua'] = info['nMua']
            d.at[sn,'BestTT'] = argmax(info['cell_IDs'])+1
            d.at[sn,'SummaryDate'] = info['dateClustered']
        except:
            print("Error updating session {}".format(i))    
    cnt+=1
 
d.to_csv(str(oakPath/'ClusterTableSummary.csv'))

In [129]:
colnames = ['SessionDate','Task','Clustered','SummaryDate','nCells','nMua','BestTT']
#emptyEntry = pd.DataFrame({key: 0 for key in colnames})
pd.DataFrame({key: [0] for key in colnames},index=['21214'])

Unnamed: 0,SessionDate,Task,Clustered,SummaryDate,nCells,nMua,BestTT
21214,0,0,0,0,0,0,0


In [132]:
Sessions =[]
Dates = []
Tasks = []
for i in oakPath.glob('*_KSClusters'):
    tmp = i.name.split('_')
    Dates.append(tmp[2])
    Tasks.append(tmp[1])
    Sessions.append(i.name.strip('_KSClusters'))

In [134]:
d['Task']=Tasks
d

Unnamed: 0,SessionDate,Task,Clustered,SummaryDate,nCells,nMua,BestTT
Li_OF_060118,1,OF,0,0,0,0,0
Li_OF_060418,0,OF,0,0,0,0,0
Li_OF_060518,0,OF,0,0,0,0,0
Li_OF_060718,0,OF,0,0,0,0,0
Li_OF_060818,0,OF,0,0,0,0,0
Li_OF_061118,0,OF,0,0,0,0,0
Li_OF_061218,0,OF,0,0,0,0,0
Li_OF_061318,0,OF,0,0,0,0,0
Li_OF_061418,0,OF,0,0,0,0,0
Li_OF_061518,0,OF,0,0,0,0,0


In [106]:
colnames = ['SessionDate','Task','Clustered','SummaryDate','nCells','nMua','BestTT']
nFiles = len(list(oakPath.glob('*_KSClusters')))
d = pd.DataFrame(0,index = np.arange(nFiles), columns=colnames)
sn =[]
for i in oakPath.glob('*_KSClusters'):
    tmp = i.name.split('_')
    sn.append(i.name.strip('_KSClusters'))
    

In [113]:
d = pd.DataFrame(0,index = sn, columns=colnames)
d.at[sn[0],'SessionDate']=1
d.to_csv('temp.csv')
d2=pd.read_csv('temp.csv',index_col=0)

False

In [93]:
colnames = ['SessionDate','Task','Clustered','SummaryDate','nCells','nMua','BestTT']
nFiles = len(list(oakPath.glob('*_KSClusters')))
d = pd.DataFrame(0,index = np.arange(nFiles), columns=colnames)

array(['Li_OF_060118', 'Li_OF_060418', 'Li_OF_060518', 'Li_OF_060718',
       'Li_OF_060818', 'Li_OF_061118', 'Li_OF_061218', 'Li_OF_061318',
       'Li_OF_061418', 'Li_OF_061518', 'Li_OF_061818', 'Li_OF_062018',
       'Li_OF_062518', 'Li_OF_062618', 'Li_OF_062718', 'Li_OF_062818',
       'Li_OF_062918', 'Li_OF_070218', 'Li_OF_070618', 'Li_OF_071018',
       'Li_OF_080118', 'Li_OF_080218', 'Li_T3g_060118', 'Li_T3g_060418',
       'Li_T3g_061318', 'Li_T3g_061518', 'Li_T3g_061818', 'Li_T3g_062018',
       'Li_T3g_062518', 'Li_T3g_062618', 'Li_T3g_062718', 'Li_T3g_062818',
       'Li_T3g_070218', 'Li_T3g_070518', 'Li_T3g_070618', 'Li_T3g_070918',
       'Li_T3g_071018', 'Li_T3g_071118', 'Li_T3g_072518', 'Li_T3g_072618',
       'Li_T3g_073018', 'Li_T3g_080118', 'Li_T3g_080218', 'Li_T3g_080318'],
      dtype=object)

In [47]:
a=cl_summary[animal][date][task]['cell_IDs']
d2 = [len(v) for k, v in a.items()]


[0, 0, 2, 0, 0, 0, 1, 2, 1, 3, 0, 0, 0, 1, 0, 0]


In [12]:
def argmax(iterable):
    return max(enumerate(iterable), key=lambda x: x[1])

In [67]:
import datetime

In [78]:
datetime.datetime.fromtimestamp(int(i.stat().st_mtime)).strftime("%B %d %Y, %I:%M%p")

'April 25 2019, 04:16PM'

In [3]:
cl_summary

/mnt/c/Users/alexg8/Documents/TreeMazeAnalyses/Clustering


In [8]:
cnt=0
for i in oakPath.glob('*.json'):
    if cnt >1:
        print('Found Two Json Files. Only one file allowed.')
        break
    with i.open() as f:
        cl_summary = json.load(f)
    cnt+=1

In [13]:
for date in cl_summary[animal].keys():
    info = cl_summary[animal][date][task]
    print( argmax(info['cell_IDs']))

(8, '9')
(8, '9')
(8, '9')
(8, '9')
(8, '9')
(8, '9')
(8, '9')
(8, '9')
(8, '9')
(8, '9')


In [27]:
argmax(info['cell_IDs'])
dict_argmax(info['cell_IDs'])+1

10

10

In [25]:
def dict_argmax(d):
    d2 = []
    for k in d.keys():
        d2.append(len(d[k]))
    return np.argmax(d2)

In [35]:
a = {'a':1,'b':2,'c':3}

for k,v in a.items():
    print(k,v)

a 1
b 2
c 3


In [33]:
a.values()

dict_values([1, 2, 3])