In [27]:
import os.path
import sys
from os import listdir

def tableName(run='004'):
    tbName0 = 'rct007_testID'
    tbName1 = run+'_flowindata'
    tbName2 = run+'_flowoutdata'
    tbName3 = run+'_plcdata'
    tbName4 = run+'_voltagedata'
    tbName5 = run+'_logdata'
    return [tbName0, tbName1, tbName2, tbName3, tbName4, tbName5]

def parseLog(cursor, run):
    # read log file
    log = readNoDateTable(cursor, run+'_logdata')
    
    # get the start and stop states by looking at state variable
    stateStart = log[log.state=='1'].date.reset_index(drop=True)
    stateEnd = log[log.state=='0'].date.reset_index(drop=True) 
    
    # check to ensure that we have the same number of states
    lenStart = len(stateStart)
    lenEnd = len(stateEnd)
    if lenStart != lenEnd:
        stop = min(lenStart, lenEnd)
        stateStart = stateStart[0:stop]
        stateEnd = stateEnd[0:stop]
        print 'States have been truncated start:%d end:%d'%(lenStart, lenEnd)
    
    return stateStart, stateEnd

def getEventIdOffset(dbase, path):
    log = pd.read_pickle(path+'/'+dbase+'_log.pkl')
    return log.iloc[-1,-2]

def pickleTable(df, path, dbase, run, eventID):
    # get rid of date
    df.drop('date', axis=1, inplace=True)
    
    #all the files to read and save
    for col in df.columns:
        filename = path+dbase+'_'+run+'_'+col+'_'+str(eventID)+'.pkl'
        if len(df[col]) !=0:
            print 'Pickling %s \n'%filename
            pd.DataFrame(df[col]).to_pickle(filename)
        else:
            print 'Event: %d Run: %s Sensor: %s is null'%(eventId, run, col)

def pickleEvent(dbase, run, start, end, event, path):
    cursor = connectDbase(dbase)

    # get table names, remove non data tables
    tbNameList = tableName(run)[1:-1]
    if dbase == 'rct008':
        tbNameList = tableName(run)[3:-1]

    for tbName in tbNameList:
        # read table
        dfTable = readDateTable(cursor, tbName, start, end)
        
        # if it is not empty make a pickel file for each sensor
        if len(dfTable) != 0:
            pickleTable(dfTable, path, dbase, run, event)
        else:
            #if it's empty print this an keep going
            print 'Table: %s is empty.\n'%tbName

    
def pickleSensorFiles(cursor, dbase, run, path, forceWrite = False):
    
    log = pd.read_pickle(path+dbase+'_log.pkl')
    log = log[log['run']==run]
    
    # find the last event id stored to pickle
    dirList = os.listdir(path)
    pklFileList = [s for s in dirList if 'pkl' in s and dbase+'_'+run in s and not '_log.pkl' in s]
    
    temp = [x.split('_')[-1] for x in pklFileList] 
    storedEvent = 0
    if temp:
        storedEvent = max([int(x.split('.')[0]) for x in temp])
        
    for eventId in log.eventId.unique():
        if forceWrite or (eventId>storedEvent):
            s = str(log[(log.eventId==eventId) & (log.state =='1')].date.values[0])    
            e = str(log[(log.eventId==eventId) & (log.state =='0')].date.values[0])    
            pickleEvent(dbase, run, s, e, eventId, path)

            
def logFileDataFrame(cursor, dbase, run, path):  
    
    firstRun = 4
    offset = 0
    
    if int(run) > firstRun:
        offset = getEventIdOffset(dbase, path) + 1
        
    # read log file
    log = readNoDateTable(cursor, run+'_logdata')
    
    if run == '004' and dbase == 'rct007':
        print 'make corrections'
        log.iloc[231,1] = 3
        log.iloc[288,1] = 2

    # error check and format log file
    # keep only states 1 and 0
    log = log[(log['state'] == '1') | (log['state'] == '0')].reset_index(drop=True)
    
    # if the comment contains the word 'error' get rid of that row
    idx  = [c[0:15] == 'Data collection' for c in log.comment.values]
    log = log[idx].reset_index(drop=True)

    # remove repeated states
    # here we keep the first state remove the rest that repeat
    states = log.state.apply(int)
    dState = states.diff()
    dState[0] = 1
    log = log[(dState == 1) | (dState == -1)].reset_index(drop=True)

    # if last state is not STOP, then get rid of it
    lastState = int(log.iloc[-1,:]['state'])
    
    if lastState == 1:
        log = log.iloc[0:-1,:].reset_index(drop=True)

    # check time stamps.
    if log.date.diff().dt.seconds.any() < 0:
         sys.exit("Non-Increasing Sequential Time in events")

    # add event ids and run
    eventId = np.arange(len(log[log.state == '1'])).repeat(2)
    log['eventId'] = eventId + offset
    log['run'] = run

    return log
    
def pickleLogFile(cursor, dbase, runList, path, forceWrite = False):
    
    # init variables 
    filename = path+dbase+'_'+'log'+'.pkl'
    dfList = []
    
    # get log data frame
    for runNum in runList:
        dfList.append(logFileDataFrame(cursor, dbase, runNum, path))
        
    # make log file from list of dataframes
    dfLog = pd.concat(dfList, axis=0, ignore_index=True)
    
    # only if we have not done so already
    if forceWrite or not os.path.isfile(filename):
        dfLog.to_pickle(filename)
        
def pickleOneSensorFiles(cursor, dbase, run, eventId, path):
    
    log = pd.read_pickle(path+dbase+'_log.pkl')
    
    # find the last event id stored to pickle
    s = str(log[(log.eventId==eventId) & (log.state =='1')].date.values[0])    
    e = str(log[(log.eventId==eventId) & (log.state =='0')].date.values[0])    
    pickleEvent(dbase, run, s, e, eventId, path)

In [None]:
def pickleCrossDbaseSensorFiles(cursor, dbase, dbase_alt, run, run_alt, path, forceWrite = False):
    print path+dbase+'_log.pkl'
    log = pd.read_pickle(path+dbase+'_log.pkl')
    log = log[log['run']==run]
    
    # find the last event id stored to pickle
    dirList = os.listdir(path)
    pklFileList = [s for s in dirList if 'pkl' in s and dbase+'_'+run in s  and dbase_alt+'_'+run_alt in s and not '_log.pkl' in s]
    
    temp = [x.split('_')[-1] for x in pklFileList] 
    
    storedEvent = 0
    if temp:
        storedEvent = max([int(x.split('.')[0]) for x in temp])
  
    for eventId in log.eventId.unique():
        if forceWrite or (eventId>storedEvent):
            s = str(log[(log.eventId==eventId) & (log.state =='1')].date.values[0])    
            e = str(log[(log.eventId==eventId) & (log.state =='0')].date.values[0])    
            pickleCrossEvent(dbase, dbase_alt, run, run_alt, s, e, eventId, path)
            
def pickleCrossEvent(dbase, dbase_alt, run, run_alt, start, end, event, path):
    cursor = connectDbase(dbase_alt)

    # get table names, remove non data tables
#     tbNameList = tableName(run)[1:-1]
#     if dbase == 'rct008':
#         tbNameList = tableName(run)[3:-1]
    tbNameList = [run_alt+'_plcdata']

    for tbName in tbNameList:
        # read table
        dfTable = readDateTable(cursor, tbName, start, end)
        
        # if it is not empty make a pickel file for each sensor
        if len(dfTable) != 0:
            # get rid of date
            dfTable.drop('date', axis=1, inplace=True)
            colList = [col for col in dfTable.columns if 'rtd' in col]
            for col in colList:
                filename = path+dbase+'_'+run+'_'+dbase_alt+'_'+run_alt+'_'+col+'_'+str(event)+'.pkl'
                if len(dfTable[col]) !=0:
                    print 'Pickling %s \n'%filename
                    pd.DataFrame(dfTable[col]).to_pickle(filename)
                else:
                    print 'Event: %d Run: %s Sensor: %s is null'%(event, run, col)            
        else:
            #if it's empty print this an keep going
            print 'Table: %s is empty.\n'%tbName