In [1]:
import numpy as np
import h5py
import os
import datetime
import xml.etree.ElementTree as ET
from tifffile import imsave

import matplotlib.pyplot as plt
%matplotlib notebook

from jupyterthemes import jtplot
jtplot.style(theme='monokai')

In [2]:
def hdfToTiff(path,hdfDataSet):
    imsave(path,np.swapaxes(np.swapaxes(hdfDataSet,0,2),1,2))

def convertRegFrame(rawFrame,linePx,tLines):
    cFr=np.reshape(rawFrame,(tLines,linePx))
    return cFr

def convertResFrame(rawFrame,linePx,tLines,sampPerPx):
    
    fullResLinePx=int(linePx*sampPerPx*2)
    resLines=int(tLines*0.5)
    halfResLinePx=int(linePx*sampPerPx)
    cFr=np.reshape(rawFrame,(resLines,fullResLinePx))
    cFr=np.reshape(cFr,(tLines,int(linePx*sampPerPx)))
    cFr[0:halfResLinePx:2,:]=np.fliplr(cFr[0:halfResLinePx:2,:])

    return cFr

In [3]:
# ******************************
# Batch Decode Raw
# ******************************

animalName="cdSom3b"
savePath='/Users/cad/Documents/cdSom3/'
basePath="/Users/cad/Documents/cdSom3/raw/"

parentDir=sorted(os.listdir(basePath))

sIDs=[]
tIDs=[]
zIDs=[]


singles=sorted([x for x in parentDir if 'Single' in x])
for n in range(0,len(singles)):
    sIDs.append("single_" + singles[n][len(singles[n])-3:len(singles[n])])

    
tSeries=sorted([x for x in parentDir if 'TSeries' in x])
for n in range(0,len(tSeries)):
    tIDs.append("tSeries_" + tSeries[n][len(tSeries[n])-3:len(tSeries[n])])

    
zSeries=sorted([x for x in parentDir if 'ZSeries' in x])
for n in range(0,len(zSeries)):
    zIDs.append("zSeries_" + zSeries[n][len(zSeries[n])-3:len(zSeries[n])])


cTime = datetime.datetime.now()
convertDate=cTime.strftime("%m%d%Y")
    
allSubPaths=singles+tSeries+zSeries
allDataIDs=sIDs+tIDs+zIDs
xmlz=[]
for n in range(0,len(allSubPaths)):
    j=os.listdir(basePath+allSubPaths[n])
    tXML=sorted([x for x in j if 'xml' in x])
    try:
        xmlz.append(tXML[0])
    except:
        xmlz.append([])

# at the end of this we have:
# a) a list of all convert directories (allSubPaths)
# b) a list of all the directory's xml meta files (xmlz)
# c) a conversion date to log for the hdf (convertDate)

# Now loop through the directories, and ...
#
#1) Parse the metadata.

In [4]:
for cL in range(0,len(allSubPaths)):
    xmlPath=basePath + allSubPaths[cL] + "/" + xmlz[cL]
    dataPath=basePath + allSubPaths[cL] + "/"
    dataSet=allDataIDs[cL]

    tree = ET.parse(basePath + allSubPaths[cL] + "/" + xmlz[cL])
    xRoot=tree.getroot()

    # The date is in the root as part of a date/time string.
    scanDate=xRoot.attrib['date'][0:10]
    labDate=xRoot.attrib['date'][0:10]
    tSP=""
    for x in range(0,len(scanDate.split('/'))):
        tSP=tSP+scanDate.split('/')[x]
    scanDate=int(tSP)
    tSP=[]

    # The frame time (interval) is the sample rate.
    frameInt=float(xRoot[1][5].attrib['value'])
    lineCount=int(xRoot[1][12].attrib['value'])
    pixelsPerLine=int(xRoot[1][20].attrib['value'])
    scanType=xRoot[1][0].attrib['value']
    pockelVal=float(xRoot[1][11][0].attrib['value'])
    pmtVal_red=float(xRoot[1][21][1].attrib['value'])
    pmtVal_green=float(xRoot[1][21][2].attrib['value'])
    pos_X=float(xRoot[1][22][0][0].attrib['value'])
    pos_Y=float(xRoot[1][22][1][0].attrib['value'])
    pos_Z=float(xRoot[1][22][2][0].attrib['value'])

    multSamp=int(xRoot[1][26].attrib['value'])
    pixelsPerFrame=int(pixelsPerLine*lineCount*multSamp)



    # get bruker clocks (relative and absolute)
    gg=xRoot[2].findall('Frame')
    bTime_rel=[]
    bTime_abs=[]
    for x in range(0,len(gg)):
        bTime_rel.append(float(gg[x].attrib['absoluteTime']))
        bTime_abs.append(float(gg[x].attrib['relativeTime']))

    # This block looks to see how many pixels comprise all the raw files. 
    # Then it makes a 1D array the size of all the pixels and maps the binary pixels into that.
    # This is the most memory efficient way (I believe) 
    # to do it without combining arrays of pointers. 

    rawDir=os.listdir(dataPath)
    rawFiles=sorted([x for x in rawDir if 'CYCLE' in x])

In [5]:
rawFiles

['CYCLE_000001_RAWDATA_000023',
 'CYCLE_000001_RAWDATA_000024',
 'CYCLE_000001_RAWDATA_000025',
 'CYCLE_000001_RAWDATA_000026']

In [6]:
len(bTime_abs)

2000

In [7]:
framesInChunks=[]
pxSizes=[]
rdc=np.array([])
if len(rawFiles)>0:
    for n in range(0,len(rawFiles)):
        curFile=dataPath+rawFiles[n]
        tsZ=np.fromfile(curFile, dtype="uint16").size
        framesInChunks.append(int(tsZ/pixelsPerFrame))
        pxSizes.append(tsZ)


    totalFrames=int(np.sum(pxSizes)/pixelsPerFrame)
    actualFrameCount=len(bTime_abs)
    bAr=np.zeros(int(np.sum(pxSizes)),dtype='uint16')

    lastStrt=0
    curSz=0

    for n in range(0,len(rawFiles)):

        curFile=dataPath+rawFiles[n]
        tAr=np.fromfile(curFile, dtype='uint16')
        curSz=int(tAr.size)
        bAr[lastStrt:lastStrt+curSz]=tAr
        lastStrt=lastStrt+curSz
        tAr=[]

    framAr=np.zeros((lineCount,pixelsPerLine,totalFrames),dtype='uint16')

    



In [8]:
print(bAr.shape)
print(pixelsPerFrame)
print(bAr.size/pixelsPerFrame)

(3149919488,)
786432
4005.3297526041665


In [9]:
print(512*3*2*512)
plt.figure(987)
plt.plot(bAr[0:int((786432*2)/772):3])

1572864


<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x115f5f668>]

In [60]:
linePixels=512*2*3
tF=bAr[0:linePixels]

In [67]:
np.mean(bAr[0:512*3])

8518.2408854166661

In [68]:
np.mean(bAr[512*3:512*3+512*3])

8775.283203125

In [24]:
totalFramePx=(1024*3)*256
print(totalFramePx)
fPx=bAr[1:totalFramePx*2+1:2]
fg=convertResFrame(fPx,512,512,3)
fg
print(fg.shape)
bb=np.reshape(fg,(fg.shape[0],int(fg.shape[1]/3),3))
mFrm=np.mean(bb,axis=2).astype('uint16')
print(np.amin(mFrm))
plt.figure(6)
plt.imshow(mFrm,vmin=7450,vmax=2*7450)

786432
(512, 1536)
7478


<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x31b6a9d68>

In [11]:
try:
    f = h5py.File(savePath+"{}_{}.hdf".format(animalName,scanDate), "a")
except:
    print("hdf already opened")

f[dataSet]=framAr[:,:,0:actualFrameCount]
f[dataSet+"_absTime"]=bTime_abs
f.close()
framAr=[]
print("finished {} of {}".format(cL+1,len(allSubPaths)))



NameError: name 'framAr' is not defined

In [None]:
# There is some offset in their raw data. Should be 13 bit, and it's close.

bitMax=2**13
rawMin=int(np.amin(bAr))
zPt=bitMax-rawMin
offs=bitMax-zPt
bAr=bAr-offs

# This blocks makes a 3-D array of LinesxPixelsxFrames.
# We then reshape individual frames from the 1D binary array 
# and compute the mean if we have multisampling of pixels. 
# We then delete the OG binary array.
# This is the most memory efficient way I know to do it. 
# I loop the mean across individual frame reshapes, 
# because reshaping the larger array is very slow because of the memory use.
# It may seem like a true vectorized/matrix approach is desired, but not in this case.

lnPx=lineCount*pixelsPerLine*multSamp
for cIM in range(0,totalFrames):
    aa=convertResFrame(bAr[(cIM*lnPx):(cIM*lnPx)+lnPx],pixelsPerLine,lineCount,multSamp)
    bb=np.reshape(aa,(aa.shape[0],int(aa.shape[1]/multSamp),multSamp))
    aa=[]
    framAr[:,:,cIM]=np.mean(bb,axis=2).astype('uint16')
    bb=[]
bAr=[]

# now we create an hdf file to store this data in.
# we also write into the hdf, using a dataset name that is the run #
# we close the file to writing and delete the OG frame array we mapped into it. 
# now your memory footprint should be negligible.



Basic Inspection .... 

In [4]:
# now, let's reopen the hdf and look at frames.
animalName='cdSom3'
scanDate='12282017'
savePath='/Users/cad/Documents/cdSom3/'
try:
    f = h5py.File(savePath+"{}_{}.hdf".format(animalName,scanDate), "a")
except:
    print('hdf open')
datasetNames = [n for n in f.keys()]
dLabs=datasetNames[0:len(datasetNames):2]
tLabs=datasetNames[1:len(datasetNames):2]

# ***** Select the non-zeroed indexed run you want to look at
# ***** Also if you want a tif.
dFoc=5
renderTif=0

curData=dLabs[dFoc-1]
curTime=tLabs[dFoc-1]

# If you want a tiff instead.
if renderTif:
    hdfToTiff(savePath+"{}_{}.tif".format(animalName,curData),f[curData])

In [5]:
# now make an image browser function (ipywidgets) and we can scrub.
def browse_images(lowCut=0,highCut=15000):
    n = f[curData].shape[2]
    def view_image(ind):
        Y=f[curData][:,:,ind]
        plt.figure(50)
        plt.imshow(Y, cmap=plt.cm.gray,aspect='equal',interpolation='bicubic',vmin=lowCut,vmax=highCut)
        plt.xticks([])
        plt.yticks([])
        plt.show()
    interact(view_image, ind=(0,n-1))

In [6]:
pIm=1
dPlt=f[curData][:,:,pIm]
plt.figure(51)
a=plt.imshow(dPlt,cmap=plt.cm.gray,interpolation='bicubic',aspect='equal')

<IPython.core.display.Javascript object>

In [7]:
# %matplotlib inline
plt.figure(50)
browse_images(1000,9000)

<IPython.core.display.Javascript object>

A Jupyter Widget

In [8]:
# compute the stack's SD across frames. 
# takes a bit, has to map into memory and upsample (sums)
stdDevIm=np.std(f[curData],axis=2)

In [9]:
plt.figure(13)
stdPlot=plt.imshow(stdDevIm, cmap=plt.cm.jet,aspect='equal',vmin=500,vmax=7000)

<IPython.core.display.Javascript object>

In [10]:
yDim=[411,418]
xDim=[174,180]
roi=f[curData][yDim[0]:yDim[1],xDim[0]:xDim[1],:]
roi=np.reshape(roi,(roi.shape[0]*roi.shape[1],roi.shape[2]))
roiData=np.mean(roi,axis=0)
tt=f[curTime][0:roiData.size]
fData=butter_lowpass_filtfilt(roiData, 1, 1/tt[1],2)


plt.figure(21)
plt.plot(tt,roiData,'b-')
plt.plot(tt,fData,'g-')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1078fbbe0>]

In [11]:
f.close()