In [9]:
import pandas as pd
import numpy as np
import re
import sys
import os, errno

# add an absolute path to the working directory for the libraries
%run '/home/ihs3/Documents/Notebooks/import_dbase.ipynb'

#formatting for panda dataframe - display all text within fields
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 1000)

In [10]:
def timeQuery (dataBase, run, startTime, endTime):
    ''' NAME
        timeQuery 

    FILE
        import_getters.ipynb

    DESCRIPTION

        This function establishes a MySQL connection between the python notebook and the server to fetch data
        for a specified time frame.  The function creates and outputs 5 dataframes out of the raw data fetched.
        - Log 
        - PLC
        - FlowIn
        - FlowOut
        - Voltage

        If the connection to the MySQL database cannot be established it throws an exception.     

    INPUT
        dataBase:  string name of database
        run:       string name of run 
        startTime: time in string format (YYYY-MM-DD 00:00:00.000000)
        endTime:   time in string format (YYYY-MM-DD 00:00:00.000000)

    RETURN
        5 panda dataframes 

    EXAMPLE
    
        dataBase  = 'rct007'
        run       = '004'
        startTime = '2016-06-28 00:46:39.381692'
        endTime   = '2016-06-28 01:16:41.334500'
        
        [plcDf, logDf, flowInDf, flowOutDf, voltageDf] = timeQuery(dataBase, run, startTime, endTime)'''
    
    # Establish a MySQL connection with the server, throw an exeption if the connection cannot be established. 
    
    try:
        cursor = connectDbase(dataBase)
    except:
        sys.exit('Connection to MySQL database cannot be established')
    
    # Define table names for query
    tbName0 = run + '_plcdata'
    tbName1 = run+ '_logdata'
    tbName2 = run + '_flowindata'
    tbName3 = run + '_flowoutdata'
    tbName4 = run + '_voltagedata'

    # Fetch data from server
    plc     = readDateTable(cursor, tbName0, startTime, endTime)
    log     = readDateTable(cursor,tbName1, startTime, endTime)
    flowIn  = readDateTable(cursor,tbName2, startTime, endTime)
    flowOut = readDateTable(cursor,tbName3, startTime, endTime)
    voltage = readDateTable(cursor,tbName4, startTime, endTime)
    
    # Create dataFrame from queries
    plcDf     = pd.DataFrame(plc)
    logDf     = pd.DataFrame(log)
    flowInDf  = pd.DataFrame(flowIn)
    flowOutDf = pd.DataFrame(flowOut)
    voltageDf = pd.DataFrame(voltage)

    return plcDf,logDf,flowInDf,flowOutDf, voltageDf

In [11]:
def getEvents(dataBase, path='/home/ihs3/Documents/Notebooks/Data/'):
    
    ''' NAME
            getEvents 
        
        FILE
            import_getters.ipynb
            
        DESCRIPTION
                     
            This function pulls all event data from the pkl file for the specified database and returns a 
            multi-tiered indexed dataframe containing:
                -eventId     (index)
                -dataBase    (index)
                -run         (index)
                -Start_Time  (column)
                -Stop_Time   (column)
                -Duty_Cycle  (column)
                -Gas         (column)
                -Pressure    (column)
           
            If the event ID's do not match when combining START and STOP events it throws an exeption.
            If the picklefile for the specified dataframe does not exist it throws an exception 
            
        INPUT
            dataBase: string name of database
            path:     optional (defaults to '/home/ihs01/Documents/NOTEBOOKS/DATA/')
            
        RETURN
            pandas dataframe 
            
        EXAMPLE
            dataBase ='rct007'
            path ='/home/ihs01/Documents/NOTEBOOKS/DATA/'
            eventsDf = getEvents(dataBase,path)
            eventsDf = getEvents('rct007')
            eventsDf = getEvents('rct007','/home/ihs01/Documents/NOTEBOOKS/DATA/')'''
    
    #Throws an error if the pickle file for events cannot be found 
    try:
        #Define pathname for pickle Read
        filename = path+ dataBase +'/'+ dataBase + '_log.pkl'

        #Read in Pickle File
        df=pd.read_pickle(filename)
        
    except:
        sys.exit('Pickle file cannot be found (getEvents)')
           
    #Parse states into two seperate dataframes -- ON event and OFF events 
    on=df.ix[(df['state']=='1')]    
    off=df.ix[(df['state']=='0')]  

    #Reset index numbers for ON and OFF dataframe so that they are the same value (parsing purposes)
    on=on.reset_index()    
    off=off.reset_index()

    #Renaming date columns to unique headers
    on=on.rename(columns={'date': 'Start_Time'})
    off=off.rename(columns={'date':'Stop_Time','eventId':'StopID'})

    #Format and remove redundant column names..
    #..keeping 1 comment column for regular expression extract function - will drop at the end 
    off.drop(['comment','state','index','run'],axis=1, inplace=True)
    on.drop(['state','index'],axis=1, inplace=True)

    #Combine on, off dataframes
    eventsDf = pd.concat([on, off], axis=1)

    #Error Check here-- StopID must = Event Id
    if eventsDf['StopID'][0] != eventsDf['eventId'][0]:
        
        #throw an exeption if START id and STOP id do not match
        sys.exit('START event and STOP event do not match (getEvents)')

    #Filter out key values from comment data fields into table
    regex = r'Duty Cyce: (?P<Duty_Cycle>\S+), Gas: (?P<Gas>\w+)   Pressure: (?P<Pressure>\S+) Torr'
    df1 = eventsDf.comment.str.extract(regex)

    #Combine df1 & df2 with eventsDf dataframe
    eventsDf = pd.concat([eventsDf, df1], axis=1)

    #Create multi-tiered index -->eventId,dbName,run
    arrays=eventsDf['eventId'],[dataBase]*len(eventsDf.index),eventsDf['run']
    tuples = list(zip(*arrays))
    index = pd.MultiIndex.from_tuples(tuples, names=['eventId', 'dataBase','run'])
    eventsDf=eventsDf.set_index(index)

    #Remove redundancies and unnecesary columns
    eventsDf.drop(['comment','eventId','run','StopID'],axis=1,inplace=True)
        
    return eventsDf

In [4]:
def getGasEvents(dataBase, gasType, path='/home/ihs3/Documents/Notebooks/Data/'):
    
    ''' NAME
            getGasEvents 
        
        FILE
            import_getters.ipynb
            
        DESCRIPTION
            This function pulls all event data for the specified database and gas type from the pkl file
            and returns a multi-tiered indexed dataframe containing:
                -eventId     (index)
                -dataBase    (index)
                -run         (index)
                -Start_Time  (column)
                -Stop_Time   (column)
                -Duty_Cycle  (column)
                -Gas         (column)
                -Pressure    (column)
           
            If the specified gasType does not exist in the database and it is empty 
            this function throws an error.
            
        INPUT
            dataBase: string name of database
            gasType:  string name of gas 
            path:     optional (defaults to '/home/ihs01/Documents/NOTEBOOKS/DATA/')
            
        RETURN
            pandas dataframe        
    
        EXAMPLE
            dataBase ='rct007'
            gasType ='Argon'
            path ='/home/ihs01/Documents/NOTEBOOKS/DATA/'
            gasEventsDf = getGasEvents(dataBase,gasType)
            gasEventsDf = getGasEvents(dataBase,gasType,path)
            gasEventsDf = getGasEvents('rct007','Argon')
            gasEventsDf = getGasEvents('rct007','Argon','/home/ihs01/Documents/NOTEBOOKS/DATA/')'''
    
    #loads all events for specified run/database-calls on getEvents function
  
    df=getEvents(dataBase, path)       
    
    #filter out specified gasType
    gasEventsDf=df[df['Gas']==gasType]
    
    #Throw exeption if gasType does not exist and the dataframe is empty
    if gasEventsDf.empty:
        
        sys.exit( 'Null DataFrame - GasType Specified cannot be found (getGasEvents)') 
   
    return gasEventsDf

In [5]:
def getPressureEvents(dataBase,pressureLevel, path='/home/ihs3/Documents/Notebooks/Data/'):
    ''' NAME
            getPressureEvents 
        
        FILE
            import_getters.ipynb
            
        DESCRIPTION
            This function pulls all event data for the specified database and pressure level from the pkl file
            and returns a multi-tiered indexed dataframe containing:
                -eventId     (index)
                -dataBase    (index)
                -run         (index)
                -Start_Time  (column)
                -Stop_Time   (column)
                -Duty_Cycle  (column)
                -Gas         (column)
                -Pressure    (column)
           
            If the specified pressure level does not exist in the database and it is empty 
            this function throws an eror.
            
        INPUT
            dataBase:      string name of database
            pressureLevel: string name of pressure level (i.e. '2')
            path:          optional (defaults to '/home/ihs01/Documents/NOTEBOOKS/DATA/')
                 
        RETURN
            pandas dataframe        
    
        EXAMPLE
            dataBase = 'rct007'
            pressureLevel = '2'
            path = '/home/ihs01/Documents/NOTEBOOKS/DATA/'
            pressureDf = getPressureEvents(dataBase,pressureLevel)
            pressureDf = getPressureEvents(dataBase,pressureLevel,path)
            pressureDf = getPressureEvents('rct007','2')
            pressureDf = getPressureEvents('rct007','2','/home/ihs01/Documents/NOTEBOOKS/DATA/')'''
     
    #loads all events for specified run/database - calls on getEvents function
    df=getEvents(dataBase,path)
       
    #filter out specified pressureLevel
    pressureEventsDf=df[df['Pressure']==pressureLevel]
    
    #Throw error if pressureLevel does not exist and dataframe is empty
    if pressureEventsDf.empty: 
        
        sys.exit('Null DataFrame - pressureLevel specified cannot be found (getPressureEvents)')
 
    return pressureEventsDf

In [6]:
def getDutyCycleEvents(dataBase, dutyCycle, path='/home/ihs3/Documents/Notebooks/Data/'):
    ''' NAME
            getDutyCycleEvents - get events from pickle files
        
        FILE
            import_getters.ipynb
            
        DESCRIPTION
            This function pulls all event data for the specified database and DutyCycle level from the pkl file
            and returns a multi-tiered indexed dataframe containing:
                -eventId     (index)
                -dataBase    (index)
                -run         (index)
                -Start_Time  (column)
                -Stop_Time   (column)
                -Duty_Cycle  (column)
                -Gas         (column)
                -Pressure    (column)
           
            If the specified Duty Cycle does not exist in the database and it is empty this 
            function throws an error.
            
        INPUT
            dataBase:  string name of database
            dutyCycle: string name of dutyCycle event (i.e. '0.6')
            path:      optional (defaults to '/home/ihs01/Documents/NOTEBOOKS/DATA/')
            
        RETURN
            dataframe 
            
        EXAMPLE
            dataBase='rct007'
            dutyCycle='0.6'
            path='/home/ihs01/Documents/NOTEBOOKS/DATA/'
            dutyCycleEventsDf = getDutyCycleEvents(dataBase, dutyCycle)
            dutyCycleEventsDf = getDutyCycleEvents(dataBase, dutyCycle, path)
            dutyCycleEventsDf = getDutyCycleEvents('rct007', '0.6')
            dutyCycleEventsDf = getDutyCycleEvents('rct007', '0.6', '/home/ihs01/Documents/NOTEBOOKS/DATA/')'''

    #loads all events for specified run/database - calls on getEvents function
    df=getEvents(dataBase, path)
    
    #filter out specified pressureLevel
    getDutyCycleEventsDf=df[df['Duty_Cycle']==dutyCycle]
    
    #Throw exeption if dutyCycle event does not exist
    if getDutyCycleEventsDf.empty:    
        
        sys.exit('Null DataFrame - dutyCycle event specified cannot be found (getDutyCycleEvents)')
       
    return getDutyCycleEventsDf

In [7]:
def getSensorNames(dataBase, run):
    ''' NAME
            getSensorNames
        
        FILE
            pickle_getter_functions.ipynb
            
        DESCRIPTION        
            This function outputs a dataframe containing a numbered list of all sensors including I/O's from a 
            MySQL connection for the specified database and run.

        INPUT
            dataBase: string name of database
            run: string name of the run
            
        RETURN
            pandas DataFrame
            
        EXAMPLE
            dataBase='rct007'
            run='004'
            sensorNamesDf = getSensorNames(dataBase, run)
            sensorNamesDf = getSensorNames('rct007', '004')'''
    
    # Connect to the database using the "dataBase" handle passed in from import_dbase.ipynb
    # Throw an error if the cursor cannot connect to the database   
    try:
        cursor = connectDbase(dataBase)
        
    except:
        sys.exit('cannot connect to dataBase (getSensorNames)')
    
    # Throw an error if sensor tables cannot be found
    # Make an SQL call to get all table headers (as dictionary items) and convert them to list items
    try:
        plc = getColName(cursor,run + '_plcdata').values()
        flowin = getColName(cursor,run + '_flowindata').values()
        flowout = getColName(cursor,run + '_flowoutdata').values()
        voltage = getColName(cursor,run + '_voltagedata').values()
        
    except:
        sys.exit('database tables cannot be found (getSensorNames)')

    # Remove date from lists
    plc.remove('date')
    flowin.remove('date')
    flowout.remove('date')
    voltage.remove('date')
    
    # Combine lists generated from table column names
    sensorNames = plc + flowin + flowout + voltage
    
    # Convert list to Dataframe for numbered index
    sensorNamesDf=pd.DataFrame(sensorNames)
    sensorNamesDf.columns=['sensor']
    
    return sensorNamesDf

In [8]:
def getSensorData(sensor, eventTuple, path='/home/ihs3/Documents/Notebooks/Data/'):
    ''' NAME
            getSensorData 
        
        FILE
            pickle_getter_functions.ipynb
            
        DESCRIPTION
            This function works if and only if getEvents function, getSensorNames function, and 
            eventTuple are defined.
            This function takes a sensor from getSensorName function and event tuple from getEvents function. 
            It returns a time series of datetimes and the corresponding raw values of the sensor.
            If the sensor name or event tuple do not exist, an error exception will be thrown.
            
        INPUT
            sensor:       string that is the name of the sensor (refer to getSensorName)
            eventTuple:   tuple that refer to an event (refer to getEvents) 
                          The tuple contains an integer event number, string database, 
                          and string run number
            path:         optional (defaults to '/home/ihs01/Documents/NOTEBOOKS/DATA/')
             
        RETURN 
            pandas tseries of datetime objects
            numpy array of raw values of the sensor
            
        EXAMPLE
            dBase = 'rct007'
            eventDf = getEvents(dBase)
            eventTuple = eventDf.index[5]
            sensor = 'rct_rtd1'
            [timeList, sensorData] = getSensorData(sensor, eventTuple)'''
    
    #converting number to string as inputs for pickle file
    event = str(eventTuple[0]) #integer event number
    reactor = str(eventTuple[1]) #database string
    run = str(eventTuple[2]) #run string
    
    #error handling for sensorName    
    filename = path + reactor + '/' + reactor +'_'+ run +'_'+ sensor +'_'+ event + '.pkl'
    
    if os.path.exists(filename):
        rawData = pd.read_pickle(filename)
    else:
        array = np.empty((3,1,))
        array[:] = np.NAN
        rawData = pd.DataFrame(array, index = eventTuple, columns = [sensor])

    rawDataindex = rawData.index
    return rawDataindex, rawData.values

In [None]:
#converts appropriate sensors to engineering units
def engDataCondit(rawData, sensorName):
    if sensorName.lower() in ['v1','v4','v8']:
        engData = rawData[1] * 0.5
    elif sensorName.lower() in ['v5']:
        engData = rawData[1] * 23.
    elif sensorName.lower() in ['flowin','flowout']:
        engData = (rawData[1]-rawData[1][0]) * 0.0005376344
    else:
        engData = rawData[1]
    return engData

In [None]:
def getEngData(sensor, eventTuple, path='/home/ihs3/Documents/Notebooks/Data/'):
    ''' NAME
            getEngData 
        
        FILE
            pickle_getter_functions.ipynb
            
        DESCRIPTION
            This functions works if and only if getSensorData function, getEvents function,
            getSensorNames function, and eventTuple are defined.
            This function takes a sensor from getSensorName function and event tuple from getEvents function. 
            It returns a time series of datetimes and the corresponding engineering values of the sensor.
            If the sensor name or event tuple do not exist, an error exception will be thrown.
            
            The metadata table provides the conversion factors.
                    
        INPUT
            sensorNumber: int that refers to one sensor (refer to getSensorName)
            eventTuple:   tuple that refer to an event (refer to getEvents) 
                          The tuple contains an integer event number, database string, 
                          and run number string
            path:         optional (defaults to '/home/ihs01/Documents/NOTEBOOKS/DATA/')
             
        RETURN 
            dataframe of datetime objects and engineering values of the sensor 
            
        EXAMPLE
            dBase = 'rct007'
            eventDf = getEvents(dBase)
            eventTuple = eventDf.index[5] 
            sensor = 'rct_rtd1'
            getEngData(sensor,eventTuple)'''
    
    #read pickle file
    rawData = getSensorData(sensor,eventTuple, path = '/home/ihs3/Documents/Notebooks/Data/')
    
    #engineering units
    engData = engDataCondit(rawData, sensor)
    
    return rawData[0],engData 


In [None]:
def getEngData2(sensor, eventTuple, path='/home/ihs3/Documents/Notebooks/Data/'):
    ''' NAME
            getEngData 
        
        FILE
            pickle_getter_functions.ipynb
            
        DESCRIPTION
            This functions works if and only if getSensorData function, getEvents function,
            getSensorNames function, and eventTuple are defined.
            This function takes a sensor from getSensorName function and event tuple from getEvents function. 
            It returns a time series of datetimes and the corresponding engineering values of the sensor.
            If the sensor name or event tuple do not exist, an error exception will be thrown.
            
            The metadata table provides the conversion factors.
                    
        INPUT
            sensorNumber: int that refers to one sensor (refer to getSensorName)
            eventTuple:   tuple that refer to an event (refer to getEvents) 
                          The tuple contains an integer event number, database string, 
                          and run number string
            path:         optional (defaults to '/home/ihs01/Documents/NOTEBOOKS/DATA/')
             
        RETURN 
            dataframe of datetime objects and engineering values of the sensor 
            
        EXAMPLE
            dBase = 'rct007'
            eventDf = getEvents(dBase)
            eventTuple = eventDf.index[5] 
            sensor = 'rct_rtd1'
            getEngData(sensor,eventTuple)'''
    
    #read pickle file
    rawData = getSensorData(sensor,eventTuple, path = '/home/ihs3/Documents/Notebooks/Data/')
    
    #engineering units
    engData = engDataCondit(rawData, sensor)
    
    #formatting
    A = pd.DataFrame(rawData[0])
    B = pd.DataFrame(engData)
    frames = [A, B]
    df = pd.concat(frames,axis=1)
    df.columns = ['Datetime', sensor]
    df = df.set_index('Datetime')
    return df 


In [None]:
def getMultiEvent(sensor, eventTupleList):

    ''' NAME
            getMultiEvent
    
        DESCRIPTION
            This function gets data from one sensor for multiple events using a list of eventTuples.
            The list of eventTuples can contain multiple runs and/or reactors.
    
        INPUT
            sensor:          string that is the name of the sensor (get this from getSensorNames)
            eventTupleList:  list of eventTuples

        RETURN
            dataframe containing event ID, reactor, run, sensor data, and timestamp

        EXAMPLE
            #Get list of Argon events
            dBase ='rct007'
            gasType ='Argon'
            gasEventsDf = getGasEvents(dBase,gasType)
            
            #Get list of tuples for Argon events
            eventTupleList = gasEventsDf.index
            sensor = 'rct_rtd1'    
            
            getMultiEvent(sensor, eventTupleList)'''
    
    data = np.array([])
    datetime = pd.DataFrame()
    indexList = []
    
    for eventTup in eventTupleList:
        [datetime2, data2] = getEngData(sensor, eventTup)
        for repeatIndex in range(0, len(data2)):
            indexList.append(eventTup)
        data = np.append(data2, data)
        datetimeDF = pd.DataFrame(datetime2)
        datetime = pd.concat([datetime, datetimeDF])
            
    df1 = pd.DataFrame(data, columns = [sensor])
    df2 = pd.DataFrame(indexList, columns = ['event id', 'reactor', 'run'])
    df = pd.DataFrame.join(df2, df1)
    datetimeList = datetime[0].tolist()
    df['timestamp'] = datetimeList
    
    return df

In [None]:
def plotSensor(dataBase, event, sensor):
    ''' NAME
        plotSensor 

    FILE
        plotSensor.ipynb

    DESCRIPTION

        This function plots data for specified sensor(s) for a single event.  
        Sensors can be passed in as a String (single sensor), tuple or list.  

        If data for a specific data cannot be found or it is in an invalid format the function will return an error.     

    INPUT
        dataBase:  string name of database
        event:     int
        sensor:    str, list, or touple


    RETURN
        plot for senor(s) at specified event 

    EXAMPLE
        #For single Events
        dataBase  = 'rct007'
        event     = 9
        sensor = 'amb_rtd1'
        plotSensor(dataBase,9,sensor)
        
        #For Multiple Events
        dataBase  = 'rct007'
        event     = 9
        sensor = 'amb_rtd1','amb_rtd2'
        plotSensor(dataBase,9,sensor)
        
        #For Multiple Events - referencing sensor Names by number range
        sensorNamesDf = getSensorNames('rct007', '004')
        plotSensor('rct007',9,sensorNamesDf.ix[0:63,'sensor'].tolist())'''
    
    # Display event to be plotted
    eventDf = getEvents(dataBase)
    eventTuple = eventDf.index[event]
    print (eventDf.loc[eventTuple])

    # Operation if a single sensor is passed through
    if type(sensor) == str:
        
        #define dataframe containing engineering data
        df = getEngData2(sensor,eventTuple)
        
        # Plot sensor data
        try:    
            print(pplotSense(df[sensor],sensor))
            
        except:
            
            print 'Error plotting',sensor
    
    elif type(sensor) == tuple or list:
        
        # Operation if multiple sensors are passed through
        for i in range(len(sensor)):

            #Define sensor for plotting purposes
            sensorName = sensor[i] 

            # Grab corresponding data for events from getEngData getter function
            df = getEngData2(sensor[i],eventTuple)
            
            # plot sensor data        
            try:    
                
                print(pplotSense(df[sensorName],sensorName))
                
            except:
                
                print 'Error plotting',sensor
                
                
    return 