Skip to content

Commit

Permalink
draw group boundaries correctly with string data
Browse files Browse the repository at this point in the history
  • Loading branch information
cpetrich committed Dec 26, 2018
1 parent 7ce8a34 commit a1a67a8
Showing 1 changed file with 54 additions and 49 deletions.
103 changes: 54 additions & 49 deletions src/rawdatx/process_XML.py
Expand Up @@ -175,16 +175,16 @@ def ET_tostring(root, encoding, pretty_print):

def find_datetime_idx(dt, db, check_until=False):
#if dt in db: return db[dt] # until 12 June 2015

if not check_until: # since 13 June 2013
# shortcut only if we are looking for FROM
if dt in db: return db[dt]
elif global_until_mode == XML_flag_until_exclusive:
# shortcut only if we are looking for UNTIL and mode is
# exclusive
if dt in db: return db[dt]


dbk = list(db.keys())
dbk.sort()
dbk=np.array(dbk)
Expand All @@ -201,12 +201,12 @@ def find_datetime_idx(dt, db, check_until=False):
'Legal values for "%s" are: "%s", "%s", "%s" (default).') %
(XML_attr_until, XML_attr_except_until, XML_attr_until_mode, XML_attr_until_mode,
global_until_mode,
XML_flag_until_inclusive,XML_flag_until_exclusive,XML_flag_until_disallowed))

XML_flag_until_inclusive,XML_flag_until_exclusive,XML_flag_until_disallowed))
if True:
# version of 13 June 2015
or_earlier = check_until
if or_earlier:
if or_earlier:
if until_is_inclusive:
# UNTIL: we return the first index AFTER the requested end
# --> until will be INCLUSIVE
Expand Down Expand Up @@ -373,8 +373,8 @@ def _nan_helper(y):
def interpolate_over_NaN(data):
if len(data[data==data]) == 0:
return data.copy()
data_out=data.copy()
nans, x= _nan_helper(data_out)
data_out=data.copy()
nans, x= _nan_helper(data_out)
data_out[nans]= np.interp(x(nans), x(~nans), data_out[~nans])
return data_out

Expand All @@ -390,36 +390,36 @@ def _detrend(y):
return (y-np.polyval(p, x)).reshape(s)

def remove_spikes(data,threshold=None,window=12):
"""remove everything that deviates more than 500kPa from the median of a detrended 1-hour (12 points) window"""
"""remove everything that deviates more than 500kPa from the median of a detrended 1-hour (12 points) window"""
# note that trends of 100kPa/hour are absolutely realistic
# so anything that departs by more than 500kPa from the trend should
# be pretty massive
# (there are 2000 kPa spikes coming from one of the sensors)
# TODO: maybe change to detection of singular spikes
# since we do have sudden jumps from tension -> -100kPa
# that give high STD but are perfecty correct

# ignore the beginning, so data length is multiples of the window size
skip = len(data)-window*(len(data)//window)

a=data[skip:]
skip = len(data)-window*(len(data)//window)
a=data[skip:]

if True:
# remove any Inf
a[a==np.inf]=np.nan
a[a==-np.inf]=np.nan
# remember where we had NaN/Inf
NaN_idx = a!=a
# interpolate over NaN:
a=interpolate_over_NaN(a)
# interpolate over NaN:
a=interpolate_over_NaN(a)

b = a.reshape((len(a)//window,window))
b = a.reshape((len(a)//window,window))

# this will fail if there are NaN (or Inf) in the arrary
# this will fail if there are NaN (or Inf) in the arrary
c = _detrend(b) # simulating scipy.signal.detrend(b,axis=1)

median = np.median(c,axis=1)

if threshold is None:
# if there is only a single peak in an array of length WINDOW
# then this peak will be N times the standard deviation of
Expand All @@ -430,7 +430,7 @@ def remove_spikes(data,threshold=None,window=12):
enhance = 100. # constantrelating the typical spike height to
# to the typical noise floor (and window size?)
# and the noise floor variability to the noise floor median
std = np.std(c,axis=1)
std = np.std(c,axis=1)
# note: do not use mean, spikes of 1e16 reading would
# cause tremendous errors.

Expand Down Expand Up @@ -462,10 +462,10 @@ def replace_time_with_NaN(data, rep_time):
else:
NAN = np.nan

for rt in rep_time:
for rt in rep_time:
date = date_string_to_datetime(rt)
if date == 'error':
raise ValueError('Could not decode date: %s' % rt)
raise ValueError('Could not decode date: %s' % rt)
if date not in datetime_idx_db:
# unknown date, silently ignore
# (could be that the XML file is covering events in the future)
Expand Down Expand Up @@ -605,7 +605,7 @@ def _symbol_convert(test_string, user_vars,sub_symbol=None,substitute=None):


def _datetime_str(dt):
if dt is None: return 'None'
if dt is None: return 'None'
out = '_datetime.datetime('
out += '%i,%i,%i,%i,%i)' % (dt.year,dt.month,dt.day,dt.hour,dt.minute)
return out
Expand Down Expand Up @@ -772,8 +772,8 @@ def make_environment(measurements,env=None):
# XML processing

def get_all_XML_tags(root, tag_name):
"""Return list of tag elements or list of self"""
if root.tag == tag_name: return [root]
"""Return list of tag elements or list of self"""
if root.tag == tag_name: return [root]
return root.findall('.//'+tag_name)

def _get_attrib_or_None(element, attribute):
Expand All @@ -795,21 +795,21 @@ def date_string_to_datetime(date):
return date_date

def _get_xml_date(element, attribute):
"""Return date or None"""
"""Return date or None"""
try:
date = element.attrib[attribute]
if len(date.strip())==0: date = None
except KeyError:
date = None

date_date = None

if date != None:
date_date = date_string_to_datetime(date)

if date_date == 'error':
raise ValueError('Invalid format for date: %s' % date)

return date_date

def _get_date_interval_of_all_parents(root, except_interval=False):
Expand All @@ -825,11 +825,11 @@ def _get_date_interval_of_all_parents(root, except_interval=False):
if (f is not None) or (u is not None):
intervals[0].append( f )
intervals[1].append( u )
return intervals
return intervals

def get_defined_date_range(element):

# get date range specified by parents
# get date range specified by parents
starts,ends = _get_date_interval_of_all_parents(element)

# add date range specified in current element
Expand Down Expand Up @@ -863,19 +863,19 @@ def get_defined_except_date_range(element):
ends.append(euntil)

return starts, ends


def _dates_in_range(dates, start_list, end_list):
"""Returns boolean array of dates in range"""
ok = np.ones(dates.shape)
for i in xrange(len(start_list)):
if start_list[i] is not None:
ok = ok * (dates>=start_list[i])

if global_until_mode == XML_flag_until_inclusive:
if end_list[i] is not None:
ok = ok * (dates<=end_list[i])

elif global_until_mode == XML_flag_until_exclusive:
if end_list[i] is not None:
ok = ok * (dates<end_list[i])
Expand All @@ -893,36 +893,36 @@ def get_all_mapped_dates(data, root):
all_dates = set()
start_global = _get_xml_date(root, XML_attr_from)
end_global = _get_xml_date(root, XML_attr_until)
# go through every mapping entry
# go through every mapping entry
maps = get_all_XML_tags(root,XML_map) + get_all_XML_tags(root,XML_def)
sources = {}
for entry in maps:
try: key = entry.attrib[XML_attr_src]
except: continue # does not have SRC attribute --> does not access data

sources[key]=entry

# get date range specified by parents
# get date range specified by parents
starts,ends = _get_date_interval_of_all_parents(entry)

# add date range specified in current element
starts.append(_get_xml_date(entry,XML_attr_from))
ends.append(_get_xml_date(entry,XML_attr_until))

# get measured dates
dates = data[key]['dates']
# get valid measured dates as specified by mapping
# --> connect through AND
ok = _dates_in_range(dates,starts, ends)

# unite with current list (use sets for speed)
# --> connect through OR
all_dates = all_dates.union( dates[ok] )

# sort
all_dates = np.array(list(all_dates))
all_dates = np.sort(all_dates)

return all_dates, sources


Expand All @@ -935,7 +935,7 @@ def make_header(sheet, datetime_string=None):
info = metadata_header
info.append(["File Time:", datetime_string])

meta = {}
meta = {}
for row in xrange(len(info)):
sheet.write_row(row,0,info[row])
meta[info[row][0]]=info[row][1]
Expand All @@ -944,13 +944,13 @@ def make_header(sheet, datetime_string=None):
return len(info), meta


def _write_dates(sheet, row0, all_dates):
def _write_dates(sheet, row0, all_dates):
number_format='yyyy/m/d h:mm'
for row in xrange(len(all_dates)):
sheet['A%i' % (row+row0)] = all_dates[row]
sheet['A%i' % (row+row0)] = all_dates[row]
sheet['A%i' % (row+row0)].number_format = number_format

def cell_apply_style(cell, style):
def cell_apply_style(cell, style):
for key in style.__dict__:
exec('cell.%s=style.%s' % (key,key)) #XYZ

Expand Down Expand Up @@ -997,6 +997,11 @@ def write_all(workbook,sheet, row0, groups, all_dates, data):
s_data1.set_num_format(data_format)
s_data1.set_left()

s_gen=workbook.add_format()
s_gen1=workbook.add_format()
s_gen1.set_left()


N_dates = len(all_dates)

if True:
Expand Down Expand Up @@ -1077,7 +1082,7 @@ def write_all(workbook,sheet, row0, groups, all_dates, data):
if values[idx2]==values[idx2]: #TEST FOR INF XYZ
# don't write NaN -- this causes Excel to emit a warning during opening
if isinstance(values[idx2],str):
sheet.write(row_data+idx2,col,values[idx2])
sheet.write(row_data+idx2,col,values[idx2], s_gen if idx>0 else s_gen1)
else:
sheet.write_number(row_data+idx2,col,values[idx2], s_data if idx>0 else s_data1)
else:
Expand All @@ -1093,20 +1098,20 @@ def write_all(workbook,sheet, row0, groups, all_dates, data):
structure[g_name][name]={'unit':u_name,'values':values.copy()}


col_group += len(maps)
col_group += len(maps)

return structure
return structure

def extract_MAPs_in_order(group):
"""get all MAP elements, including those in nested SET elements"""
"""get all MAP elements, including those in nested SET elements"""

# flatten the group and inspect all elements separately
children=group.getiterator()
maps=[]
for child in children:
if child.tag in (XML_map,): maps.append(child)

return maps
return maps

def write_sources(workbook, sheet, sources):
s_title=workbook.add_format({'bold':True})
Expand Down Expand Up @@ -1264,7 +1269,7 @@ def cfg_get_string(cfg, section, item):
except configparser.NoOptionError: xml_path = path_in

fn_in_npy = cfg_get_string(config,CFG_fn_path,'raw_data')
fn_in_xml_definition = cfg_get_string(config,CFG_fn_path,'xml_map')
fn_in_xml_definition = cfg_get_string(config,CFG_fn_path,'xml_map')
fn_out_excel = cfg_get_string(config,CFG_fn_path,'processed_data_xlsx')
fn_out_structure = cfg_get_string(config,CFG_fn_path,'processed_data_npy')

Expand Down

0 comments on commit a1a67a8

Please sign in to comment.