In [None]:
from numpy import array, uint32, uint64, str as npstr

from datetime import datetime

from pandas import read_csv, read_pickle, Series, to_datetime

from os.path import expanduser, join

from h5py import File as h5File

SENSORS = {'PINP': 'Pump Intake Pressure',
           'PCAS': 'Casing Pressure',
           'PTUB': 'Tubing Pressure',
           'VDH': 'Pump Vibration',
           'TCPU': 'CPU Temperature',
           'EHD': 'Motor Voltage', # Actual voltage of the motor
           'PLIN': 'Line Pressure',
           'TVFD1': 'Heatsink Temperature',
           'EINPA': 'Input Voltage', # Input to the drive on surface
           'TINP': 'Intake Temperature',
           'IDH': 'Motor Current', # EHD Current
           'NSTART': 'Number of Starts',
           'JOUT': 'Output Power', # Output power of the drive
           'NPF': 'Power Factor',
           'YRUN': 'Run Status', # text
           'YSD': 'Last Shutdown Reason', # text
           'KRUNRSAC': 'Runtime Since Restart',
           'EVFD1OUT': 'Output Drive Voltage',
           'SVFD1OUT': 'Drive Frequency',
           'IMOUL': 'Motor Underload'}

YSD_DICT = {'DH Data Invalid': 1,
            'Manual Stop': 2,
            'PwrFail Sys Stop': 3,
            'Ext Flt Warning': 4,
            'No Faults/Warnin': 5,
            'Overload Fault': 6,
            'OVER CURRENT': 7,
            'No Faults or War': 8,
            'Underload Fault': 33,
            'DHwinding THi Fl': 10,
            'INPUT LINE SUPER': 11,
            'Motor Temp Hi': 12,
            'V7 Fault PL': 13,
            'V7 Dig In 2': 14,
            'Watchdog Reset': 15,
            'V7 Fault OL2':16,
            'Motor Overload F': 17,
            'Auxiliary Input': 18,
            'Heatsink Overtem': 19,
            'Hardware OverCur': 20,
            'V7 Fault UV': 21,
            'Auto Restart Tri': 22,
            'Hardware 3 FAULT': 23,
            'V7 Dig In 3': 24,
            'Start Failed': 25,
            'Underload': 26,
            'V7 Dig In 1': 27,
            'V7 Fault OV': 28,
            'DInput 1 Lckout': 29,
            'DInput 1 Fault': 30,
            'IGBT TEMPERATURE': 31,
            'Dig In 12': 32,
            'Underload FAULT': 33,
            'RTD 5 OH': 34,
            'Drive Freq Low': 35,
            'VFD EXT FAULT': 36,
            'VFD BUS ComFLT': 37,
            'Digital Input 1': 38,
            'BUS COMM FAULT': 39,
            'DEFAULT': 40,
            'KEYPAD_STOP': 41,
            'Intake Temp Hi': 42,
            'V7 Dig In 4': 43,
            'Underload Flt': 33,
            'Underload Lckout': 45,
            'OVER_CURRENT_A': 46,
            'Phase A pos IOT': 47,
            'UNDERLOAD_SD': 48,
            'IO1DI1_SD': 49,
            'OVERLOAD_LCK': 50,
            '1837': 51,
            '1838': 52,
            'UAB1_THLD_SD': 53,
            'MOTOR_FAILED_TO_': 54,
            'GasLckMaxCycLcko': 55,
            'VFD OVER CURRENT': 56,
            'PID HiLv Stop': 57,
            '0': 58,
            'Phase B pos IOT': 59,
            'DHwinding THi Lc': 60,
            'Lo Freq Fault': 61,
            'Lo Freq Lockout': 62,
            'V7 HBB E-STOP': 63,
            'DInput 2 Fault': 64,
            'Self-Stop': 65,
            'Drive Disabled': 66,
            'POWER FAILURE': 67,
            'POWER UP': 68,
            'Aux Input 8': 69,
            'Decel Inhibit FA': 70,
            'NA 082': 71,
            'TEMP 2 FAULT': 72,
            'MOTOR UNDRLOAD': 73,
            'UAB2_THLD_SD': 74,
            'DHintake P Lo Lc': 75,
            'Breaker 2 Fault': 76,
            'Intake Press Low': 77,
            'Input Cur.Unbal': 78,
            'Dig In 4': 79,
            'Input VoltUnbal': 80,
            'Input UnderVolt': 81,
            'NA 160': 82,
            '4294967295': 83,
            'A Fault': 84,
            'IGBT TEMP': 31,
            'AFE_A Start Flt': 86,
            'OverVoltage FAUL': 87,
            'Undervoltage FAU': 88,
            'NA 083': 89,
            'Drive Overload F': 90,
            'NA 16': 91,
            'Ground FAULT': 92,
            'GDI COMM_FAILURE': 93,
            'converter watchd': 94,
            '1598': 95,
            '252': 96,
            'CSU_COMFAIL_SD': 97,
            'VFD SLOT FLT': 98,
            'Parameters Defau': 99,
            'OVERVOLT_SD': 100,
            '1769': 101,
            'Phase C pos IOT': 102,
            'Phase B neg IOT': 103,
            'OVER_CURRENT_B': 104,
            'Parameter Checks': 105,
            'I UNBAL LCK': 106,
            '1849': 107,
            'Phase A neg IOT': 108,
            'UNDERVOLT_SD': 109,
            'SYSTEM FAULT': 110,
            'OVER_CURRENT_C': 111,
            'V UNBAL_SD': 112,
            'DCBus V Hi Locko': 113,
            'DCBus V Hi Fault': 114,
            'AFE_B Start Lcko': 115,
            'UNKNOWN FAULT': 116,
            'I UNBAL SD': 117,
            'Filter Overheat':118,
            'V7 Fault LF': 119,
            'V7 Fault OC': 120,
            'SCADA_STOP': 121}

YRUN_DICT = {'1:Running': 0,
            '0:Stopped': 1,
            '0:Not Running': 2,
            'AUTO RUN': 3,
            'NOT RUNNING': 2,
            'RUNNING': 0,
            'DOWN': 4,
            'E:FLT RESTART': 5,
            'E:MOTOR OFF': 6,
            'E:FAULTED': 7,
            'E:STOP': 8,
            'HAND RUN': 9,
            '22': 22,
            'ESP RUNNING': 0,
            'ESP DOWN': 4,
            'Down': 4,
            'Running': 0,
            '1': 0,
            'OK': 0,
            '0:Down': 4}

def parse_string(string):
    try:
        return float(string)
    except ValueError:
        try:
            YSD_DICT[string]
            return YSD_DICT[string]
        except KeyError:
            return YRUN_DICT[string]

def datetime_to_secfromepoch(datetime_arr, format):
    return array([(datetime.strptime(i, format) -
                   datetime(1970, 1, 1)).total_seconds()
                   for i in datetime_arr]
                 , dtype=uint64)

def lookup(s):
    """
    This is an extremely fast approach to datetime parsing.
    For large data, the same dates are often repeated. Rather than
    re-parse these, we store all unique dates, parse them, and
    use a lookup to convert all dates.
    """
    dates = {date:to_datetime(date) for date in s.unique()}
    return s.map(dates)

# Set input data path
file_folder = join('data')
file_name = 'esp_hist_anon.csv'
file_path = join(expanduser('~'), file_folder, file_name)

# Read csv to DataFrame
read_raw = False
overwrite = False
if read_raw is True:
    esp_df = read_csv(file_path, header=None,
                    parse_dates=None,
                    names=['field', 'esp', 'sensor', 'values', 'date_time'],
                    dtype={'field': uint32, 'esp': uint32, 'sensor': npstr,
                            'values': npstr, 'date_time': npstr})

    if overwrite is True:
        output_file_folder = join(expanduser('~'), file_folder)
        esp_df.to_pickle(join(output_file_folder, 'foo.pkl'))

elif read_raw is False:
    output_file_folder = join(expanduser('~'), file_folder)
    esp_df = read_pickle(join(output_file_folder, 'foo.pkl'))

esp_df['date_time'] = lookup(esp_df['date_time'])

esp_df['values'] = esp_df['values'].apply(parse_string)

# Convert Date Time to Seconds from epoch
new_time = Series(datetime_to_secfromepoch(esp_df['date_time'].values.astype('str'),
                                           format="%Y-%m-%dT%H:%M:%S.000000000"))

# Drop original date time
# esp_df.drop('date_time', 1, inplace=True)

# Add new time (s)
esp_df = esp_df.assign(time=new_time.values)

output_file_folder = join(expanduser('~'), file_folder)

with h5File(join(output_file_folder, 'esp_data.h5'), 'w') as fp:
    for field, field_val in esp_df.groupby(['field']):
        field_grp = fp.create_group(field.astype('str'))
        for esp, esp_val in field_val.groupby(['esp']):
            esp_grp = field_grp.create_group(esp.astype('str'))
            for sensor, sensor_val in esp_val.groupby(['sensor']):
                data_arr = sensor_val[['time', 'values']].values
                esp_grp.create_dataset(sensor, data=data_arr)
