We can open a hobo csv file with csv.reader:

In [1]:
import csv
with open('sample_hobo_data.csv', newline='') as f:
    reader = csv.reader(f)
    for row in reader:
        print(row)

['ï»¿"Plot Title: P12_4"']
['#', 'Date Time, GMT+01:00', 'Temp, Â°F (LGR S/N: 10469238, SEN S/N: 10469238)', 'Coupler Detached (LGR S/N: 10469238)', 'Coupler Attached (LGR S/N: 10469238)', 'Host Connected (LGR S/N: 10469238)', 'Stopped (LGR S/N: 10469238)', 'End Of File (LGR S/N: 10469238)']
['1', '10/10/19 11:00:00 AM', '72.199', '', '', '', '', '']
['2', '10/10/19 11:00:02 AM', '', 'Logged', '', '', '', '']
['3', '10/10/19 11:05:00 AM', '72.545', '', '', '', '', '']
['4', '10/10/19 11:10:00 AM', '72.372', '', '', '', '', '']
['5', '10/10/19 11:15:00 AM', '72.545', '', '', '', '', '']
['6', '10/10/19 11:20:00 AM', '72.545', '', '', '', '', '']
['7', '10/10/19 11:25:00 AM', '72.028', '', '', '', '', '']
['8', '10/10/19 11:30:00 AM', '70.480', '', '', '', '', '']
['9', '10/10/19 11:35:00 AM', '69.793', '', '', '', '', '']
['10', '10/10/19 11:40:00 AM', '69.449', '', '', '', '', '']
['11', '10/10/19 11:45:00 AM', '69.278', '', '', '', '', '']
['12', '10/10/19 11:50:00 AM', '69.278', '', 

['1484', '10/15/19 02:30:00 PM', '64.481', '', '', '', '', '']
['1485', '10/15/19 02:35:00 PM', '64.481', '', '', '', '', '']
['1486', '10/15/19 02:40:00 PM', '64.481', '', '', '', '', '']
['1487', '10/15/19 02:45:00 PM', '64.481', '', '', '', '', '']
['1488', '10/15/19 02:50:00 PM', '64.481', '', '', '', '', '']
['1489', '10/15/19 02:55:00 PM', '64.310', '', '', '', '', '']
['1490', '10/15/19 03:00:00 PM', '64.310', '', '', '', '', '']
['1491', '10/15/19 03:05:00 PM', '64.310', '', '', '', '', '']
['1492', '10/15/19 03:10:00 PM', '64.310', '', '', '', '', '']
['1493', '10/15/19 03:15:00 PM', '64.310', '', '', '', '', '']
['1494', '10/15/19 03:20:00 PM', '64.310', '', '', '', '', '']
['1495', '10/15/19 03:25:00 PM', '64.310', '', '', '', '', '']
['1496', '10/15/19 03:30:00 PM', '64.310', '', '', '', '', '']
['1497', '10/15/19 03:35:00 PM', '64.139', '', '', '', '', '']
['1498', '10/15/19 03:40:00 PM', '64.139', '', '', '', '', '']
['1499', '10/15/19 03:45:00 PM', '64.139', '', '', '', 

### Working with the header row

In [160]:
import csv

def get_header_row(reader):
    for row in reader:
        if row[0]=='#':
            return row

with open('sample_hobo_data.csv', newline='') as f:
    reader = csv.reader(f)
    header_row=get_header_row(reader)
            
header_row

['#',
 'Date Time, GMT+01:00',
 'Temp, Â°F (LGR S/N: 10469238, SEN S/N: 10469238)',
 'Coupler Detached (LGR S/N: 10469238)',
 'Coupler Attached (LGR S/N: 10469238)',
 'Host Connected (LGR S/N: 10469238)',
 'Stopped (LGR S/N: 10469238)',
 'End Of File (LGR S/N: 10469238)']

Each header can have a number of things:
- a title (i.e. '#', 'Date Time')
- a timezone (if it's the 'Date Time header)
- unit (if it's a variable with units)
- a logger serial number
- a sensor serial number

In [155]:
import re
def get_title(header):
    result = re.search('(.*?)((,| [(])|$)', header)
    try:
        return result.group(1)
    except AttributeError:
        return None
    
for header in header_row:
    print(header, end=' - ')
    print(get_title(header))

# - #
Date Time, GMT+01:00 - Date Time
Temp, Â°F (LGR S/N: 10469238, SEN S/N: 10469238) - Temp
Coupler Detached (LGR S/N: 10469238) - Coupler Detached
Coupler Attached (LGR S/N: 10469238) - Coupler Attached
Host Connected (LGR S/N: 10469238) - Host Connected
Stopped (LGR S/N: 10469238) - Stopped
End Of File (LGR S/N: 10469238) - End Of File


In [87]:
import re
def get_timezone(header):
    result = re.search('Date Time, (.*)', header)
    try:
        return result.group(1)
    except AttributeError:
        return None
    
for header in header_row:
    print(header, end=' - ')
    print(get_timezone(header))

# - None
Date Time, GMT+01:00 - GMT+01:00
Temp, Â°F (LGR S/N: 10469238, SEN S/N: 10469238) - None
Coupler Detached (LGR S/N: 10469238) - None
Coupler Attached (LGR S/N: 10469238) - None
Host Connected (LGR S/N: 10469238) - None
Stopped (LGR S/N: 10469238) - None
End Of File (LGR S/N: 10469238) - None


In [157]:
import re
def get_units(header):
    result = re.search(', (.*) [(]', header)
    try:
        return result.group(1)
    except AttributeError:
        return None
    
for header in header_row:
    print(header, end=' - ')
    print(get_units(header))

# - None
Date Time, GMT+01:00 - None
Temp, Â°F (LGR S/N: 10469238, SEN S/N: 10469238) - Â°F
Coupler Detached (LGR S/N: 10469238) - None
Coupler Attached (LGR S/N: 10469238) - None
Host Connected (LGR S/N: 10469238) - None
Stopped (LGR S/N: 10469238) - None
End Of File (LGR S/N: 10469238) - None


In [101]:
import re
def get_logger_serial_number(header):
    result = re.search('LGR S/N: (.*?)[\),]', header)
    try:
        return result.group(1)
    except AttributeError:
        return None
    
for header in header_row:
    print(header, end=' - ')
    print(get_logger_serial_number(header))

# - None
Date Time, GMT+01:00 - None
Temp, Â°F (LGR S/N: 10469238, SEN S/N: 10469238) - 10469238
Coupler Detached (LGR S/N: 10469238) - 10469238
Coupler Attached (LGR S/N: 10469238) - 10469238
Host Connected (LGR S/N: 10469238) - 10469238
Stopped (LGR S/N: 10469238) - 10469238
End Of File (LGR S/N: 10469238) - 10469238


In [102]:
import re
def get_sensor_serial_number(header):
    result = re.search('SEN S/N: (.*?)[\),]', header)
    try:
        return result.group(1)
    except AttributeError:
        return None
    
for header in header_row:
    print(header, end=' - ')
    print(get_sensor_serial_number(header))

# - None
Date Time, GMT+01:00 - None
Temp, Â°F (LGR S/N: 10469238, SEN S/N: 10469238) - 10469238
Coupler Detached (LGR S/N: 10469238) - None
Coupler Attached (LGR S/N: 10469238) - None
Host Connected (LGR S/N: 10469238) - None
Stopped (LGR S/N: 10469238) - None
End Of File (LGR S/N: 10469238) - None


It would then be possible to create a 'header_list' with each item as a dictionary of these attributes above.

In [159]:
def get_header_list(header_row):
    result=[]
    for i,header in enumerate(header_row):
        d={}
        d['column']=i
        d['title']=get_title(header)
        d['timezone']=get_timezone(header)
        d['units']=get_units(header)
        d['logger_serial_number']=get_logger_serial_number(header)
        d['sensor_serial_number']=get_sensor_serial_number(header)
        result.append(d)
    return result

header_list=get_header_list(header_row)
header_list    

[{'column': 0,
  'title': '#',
  'timezone': None,
  'units': None,
  'logger_serial_number': None,
  'sensor_serial_number': None},
 {'column': 1,
  'title': 'Date Time',
  'timezone': 'GMT+01:00',
  'units': None,
  'logger_serial_number': None,
  'sensor_serial_number': None},
 {'column': 2,
  'title': 'Temp',
  'timezone': None,
  'units': 'Â°F',
  'logger_serial_number': '10469238',
  'sensor_serial_number': '10469238'},
 {'column': 3,
  'title': 'Coupler Detached',
  'timezone': None,
  'units': None,
  'logger_serial_number': '10469238',
  'sensor_serial_number': None},
 {'column': 4,
  'title': 'Coupler Attached',
  'timezone': None,
  'units': None,
  'logger_serial_number': '10469238',
  'sensor_serial_number': None},
 {'column': 5,
  'title': 'Host Connected',
  'timezone': None,
  'units': None,
  'logger_serial_number': '10469238',
  'sensor_serial_number': None},
 {'column': 6,
  'title': 'Stopped',
  'timezone': None,
  'units': None,
  'logger_serial_number': '10469238'

### Working with the time series data

In [166]:
import csv

def get_data_rows(reader):
    #flag=false
    result=[]
    for row in reader:
        try:
            int(row[0])
            result.append(row)
        except ValueError:
            pass
    return result
        
with open('sample_hobo_data.csv', newline='') as f:
    reader = csv.reader(f)
    header_row=get_header_row(reader)
    data_rows=get_data_rows(reader)
            
data_rows

[['1', '10/10/19 11:00:00 AM', '72.199', '', '', '', '', ''],
 ['2', '10/10/19 11:00:02 AM', '', 'Logged', '', '', '', ''],
 ['3', '10/10/19 11:05:00 AM', '72.545', '', '', '', '', ''],
 ['4', '10/10/19 11:10:00 AM', '72.372', '', '', '', '', ''],
 ['5', '10/10/19 11:15:00 AM', '72.545', '', '', '', '', ''],
 ['6', '10/10/19 11:20:00 AM', '72.545', '', '', '', '', ''],
 ['7', '10/10/19 11:25:00 AM', '72.028', '', '', '', '', ''],
 ['8', '10/10/19 11:30:00 AM', '70.480', '', '', '', '', ''],
 ['9', '10/10/19 11:35:00 AM', '69.793', '', '', '', '', ''],
 ['10', '10/10/19 11:40:00 AM', '69.449', '', '', '', '', ''],
 ['11', '10/10/19 11:45:00 AM', '69.278', '', '', '', '', ''],
 ['12', '10/10/19 11:50:00 AM', '69.278', '', '', '', '', ''],
 ['13', '10/10/19 11:55:00 AM', '69.278', '', '', '', '', ''],
 ['14', '10/10/19 12:00:00 PM', '69.449', '', '', '', '', ''],
 ['15', '10/10/19 12:05:00 PM', '69.622', '', '', '', '', ''],
 ['16', '10/10/19 12:10:00 PM', '69.964', '', '', '', '', ''],
 

In [180]:
def get_data_columns(data_rows):
    return list(zip(*data_rows))
    
data_columns=get_data_columns(data_rows)
data_columns 

[('1',
  '2',
  '3',
  '4',
  '5',
  '6',
  '7',
  '8',
  '9',
  '10',
  '11',
  '12',
  '13',
  '14',
  '15',
  '16',
  '17',
  '18',
  '19',
  '20',
  '21',
  '22',
  '23',
  '24',
  '25',
  '26',
  '27',
  '28',
  '29',
  '30',
  '31',
  '32',
  '33',
  '34',
  '35',
  '36',
  '37',
  '38',
  '39',
  '40',
  '41',
  '42',
  '43',
  '44',
  '45',
  '46',
  '47',
  '48',
  '49',
  '50',
  '51',
  '52',
  '53',
  '54',
  '55',
  '56',
  '57',
  '58',
  '59',
  '60',
  '61',
  '62',
  '63',
  '64',
  '65',
  '66',
  '67',
  '68',
  '69',
  '70',
  '71',
  '72',
  '73',
  '74',
  '75',
  '76',
  '77',
  '78',
  '79',
  '80',
  '81',
  '82',
  '83',
  '84',
  '85',
  '86',
  '87',
  '88',
  '89',
  '90',
  '91',
  '92',
  '93',
  '94',
  '95',
  '96',
  '97',
  '98',
  '99',
  '100',
  '101',
  '102',
  '103',
  '104',
  '105',
  '106',
  '107',
  '108',
  '109',
  '110',
  '111',
  '112',
  '113',
  '114',
  '115',
  '116',
  '117',
  '118',
  '119',
  '120',
  '121',
  '122',
  '123',
 

### Working with timezones

In [220]:
def get_hobo_tz(header_list):
    for header_dict in header_list:
        tz=header_dict['timezone']
        if tz: return tz

hobo_tz=get_hobo_tz(header_list)
hobo_tz

'GMT+01:00'

In [201]:
def get_timezone_timedelta(hobo_tz):
    if hobo_tz.startswith('GMT'):
        pass
    else:
        print('Warning: timezone code not understood - GMT assumed')
                
    sign=hobo_tz[-6:-5]
    if sign=='+':
        mod=1
    else:
        mod=-1
    time_str=hobo_tz[-5:]
    dt=datetime.datetime.strptime('01:00', '%I:%M')
    hour=dt.hour*mod
    minutes=dt.minute*mod        

    return datetime.timedelta(hours=hour,minutes=minutes)
        
td=get_timezone_timedelta(hobo_tz)
td

datetime.timedelta(seconds=3600)

In [203]:
def get_timezone_object(hobo_tz):
    td=get_timezone_timedelta(hobo_tz)
    return datetime.timezone(td,hobo_tz)
    
tz=get_timezone_object(hobo_tz)
tz

datetime.timezone(datetime.timedelta(seconds=3600), 'GMT+01:00')

### Working with the timestamps

The timestamps are:

In [181]:
timestamps=data_columns[1]
timestamps[:3]

('10/10/19 11:00:00 AM', '10/10/19 11:00:02 AM', '10/10/19 11:05:00 AM')

These can be converted to datetime.datetime objects

In [218]:
import datetime
def convert_timestamp_to_datetime(timestamp,tz):
    dt=datetime.datetime.strptime(timestamp, '%m/%d/%y %I:%M:%S %p')
    return datetime.datetime(dt.year,dt.month,dt.day,dt.hour,dt.minute,dt.second,0,tz)

datetimes=[convert_timestamp_to_datetime(x,tz) for x in timestamps]
datetimes[:3]

[datetime.datetime(2019, 10, 10, 11, 0, tzinfo=datetime.timezone(datetime.timedelta(seconds=3600), 'GMT+01:00')),
 datetime.datetime(2019, 10, 10, 11, 0, 2, tzinfo=datetime.timezone(datetime.timedelta(seconds=3600), 'GMT+01:00')),
 datetime.datetime(2019, 10, 10, 11, 5, tzinfo=datetime.timezone(datetime.timedelta(seconds=3600), 'GMT+01:00'))]