8787
8888import numpy as npy
8989
90+
9091from matplotlib import nxutils
9192from matplotlib import cbook
9293
@@ -2143,10 +2144,10 @@ def key_desc(name):
21432144
21442145
21452146def csv2rec (fname , comments = '#' , skiprows = 0 , checkrows = 0 , delimiter = ',' ,
2146- converterd = None , names = None , missing = None ):
2147+ converterd = None , names = None , missing = '' , missingd = None ):
21472148 """
21482149 Load data from comma/space/tab delimited file in fname into a
2149- numpy record array and return the record array.
2150+ numpy (m) record array and return the record array.
21502151
21512152 If names is None, a header row is required to automatically assign
21522153 the recarray names. The headers will be lower cased, spaces will
@@ -2172,13 +2173,24 @@ def csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',',
21722173 names, if not None, is a list of header names. In this case, no
21732174 header will be read from the file
21742175
2176+ missingd - is a dictionary mapping munged column names to field values
2177+ which signify that the field does not contain actual data and should
2178+ be masked, e.g. '0000-00-00' or 'unused'
2179+
2180+ missing - a string whose value signals a missing field regardless of
2181+ the column it appears in, e.g. 'unused'
2182+
21752183 if no rows are found, None is returned -- see examples/loadrec.py
21762184 """
21772185
21782186 if converterd is None :
21792187 converterd = dict ()
21802188
2189+ if missingd is None :
2190+ missingd = {}
2191+
21812192 import dateutil .parser
2193+ import datetime
21822194 parsedate = dateutil .parser .parse
21832195
21842196
@@ -2226,13 +2238,27 @@ def process_skiprows(reader):
22262238
22272239 process_skiprows (reader )
22282240
2229- dateparser = dateutil .parser .parse
2241+ def ismissing (name , val ):
2242+ "Should the value val in column name be masked?"
22302243
2231- def myfloat (x ):
2232- if x == missing :
2233- return npy .nan
2244+ if val == missing or val == missingd .get (name ) or val == '' :
2245+ return True
22342246 else :
2235- return float (x )
2247+ return False
2248+
2249+ def with_default_value (func , default ):
2250+ def newfunc (name , val ):
2251+ if ismissing (name , val ):
2252+ return default
2253+ else :
2254+ return func (val )
2255+ return newfunc
2256+
2257+ dateparser = dateutil .parser .parse
2258+ mydateparser = with_default_value (dateparser , datetime .date (1 ,1 ,1 ))
2259+ myfloat = with_default_value (float , npy .nan )
2260+ myint = with_default_value (int , - 1 )
2261+ mystr = with_default_value (str , '' )
22362262
22372263 def mydate (x ):
22382264 # try and return a date object
@@ -2241,16 +2267,16 @@ def mydate(x):
22412267 if d .hour > 0 or d .minute > 0 or d .second > 0 :
22422268 raise ValueError ('not a date' )
22432269 return d .date ()
2270+ mydate = with_default_value (mydate , datetime .date (1 ,1 ,1 ))
22442271
2245-
2246- def get_func (item , func ):
2272+ def get_func (name , item , func ):
22472273 # promote functions in this order
2248- funcmap = {int :myfloat , myfloat :mydate , mydate :dateparser , dateparser : str }
2249- try : func (item )
2274+ funcmap = {myint :myfloat , myfloat :mydate , mydate :mydateparser , mydateparser : mystr }
2275+ try : func (name , item )
22502276 except :
2251- if func == str :
2277+ if func == mystr :
22522278 raise ValueError ('Could not find a working conversion function' )
2253- else : return get_func (item , funcmap [func ]) # recurse
2279+ else : return get_func (name , item , funcmap [func ]) # recurse
22542280 else : return func
22552281
22562282
@@ -2266,7 +2292,7 @@ def get_converters(reader):
22662292 converters = None
22672293 for i , row in enumerate (reader ):
22682294 if i == 0 :
2269- converters = [int ]* len (row )
2295+ converters = [myint ]* len (row )
22702296 if checkrows and i > checkrows :
22712297 break
22722298 #print i, len(names), len(row)
@@ -2276,10 +2302,10 @@ def get_converters(reader):
22762302 if func is None :
22772303 func = converterd .get (name )
22782304 if func is None :
2279- if not item .strip (): continue
2305+ # if not item.strip(): continue
22802306 func = converters [j ]
22812307 if len (item .strip ()):
2282- func = get_func (item , func )
2308+ func = get_func (name , item , func )
22832309 converters [j ] = func
22842310 return converters
22852311
@@ -2307,7 +2333,7 @@ def get_converters(reader):
23072333 item = itemd .get (item , item )
23082334 cnt = seen .get (item , 0 )
23092335 if cnt > 0 :
2310- names .append (item + '%d' % cnt )
2336+ names .append (item + '_ %d' % cnt )
23112337 else :
23122338 names .append (item )
23132339 seen [item ] = cnt + 1
@@ -2327,15 +2353,24 @@ def get_converters(reader):
23272353 # iterate over the remaining rows and convert the data to date
23282354 # objects, ints, or floats as approriate
23292355 rows = []
2356+ rowmasks = []
23302357 for i , row in enumerate (reader ):
23312358 if not len (row ): continue
23322359 if row [0 ].startswith (comments ): continue
2333- rows .append ([func (val ) for func , val in zip (converters , row )])
2360+ rows .append ([func (name , val ) for func , name , val in zip (converters , names , row )])
2361+ rowmasks .append ([ismissing (name , val ) for name , val in zip (names , row )])
23342362 fh .close ()
23352363
23362364 if not len (rows ):
23372365 return None
2338- r = npy .rec .fromrecords (rows , names = names )
2366+ if npy .any (rowmasks ):
2367+ try : from numpy .ma import mrecords
2368+ except ImportError :
2369+ raise RuntimeError ('numpy 1.05 or later is required for masked array support' )
2370+ else :
2371+ r = mrecords .fromrecords (rows , names = names , mask = rowmasks )
2372+ else :
2373+ r = npy .rec .fromrecords (rows , names = names )
23392374 return r
23402375
23412376
@@ -2529,26 +2564,59 @@ def format(item, just_pad_prec_spacer):
25292564
25302565
25312566
2532- def rec2csv (r , fname , delimiter = ',' , formatd = None ):
2567+ def rec2csv (r , fname , delimiter = ',' , formatd = None , missing = '' ,
2568+ missingd = None ):
25332569 """
2534- Save the data from numpy record array r into a comma/space/tab
2570+ Save the data from numpy (m)recarray r into a comma/space/tab
25352571 delimited file. The record array dtype names will be used for
25362572 column headers.
25372573
25382574
25392575 fname - can be a filename or a file handle. Support for gzipped
25402576 files is automatic, if the filename ends in .gz
2577+
2578+ See csv2rec and rec2csv for information about missing and
2579+ missingd, which can be used to fill in masked values into your CSV
2580+ file.
25412581 """
2582+
2583+ if missingd is None :
2584+ missingd = dict ()
2585+
2586+ def with_mask (func ):
2587+ def newfunc (val , mask , mval ):
2588+ if mask :
2589+ return mval
2590+ else :
2591+ return func (val )
2592+ return newfunc
2593+
25422594 formatd = get_formatd (r , formatd )
25432595 funcs = []
25442596 for i , name in enumerate (r .dtype .names ):
2545- funcs .append (csvformat_factory (formatd [name ]).tostr )
2597+ funcs .append (with_mask ( csvformat_factory (formatd [name ]).tostr ) )
25462598
25472599 fh , opened = cbook .to_filehandle (fname , 'w' , return_opened = True )
25482600 writer = csv .writer (fh , delimiter = delimiter )
25492601 header = r .dtype .names
25502602 writer .writerow (header )
2603+
2604+ # Our list of specials for missing values
2605+ mvals = []
2606+ for name in header :
2607+ mvals .append (missingd .get (name , missing ))
2608+
2609+ ismasked = False
2610+ if len (r ):
2611+ row = r [0 ]
2612+ ismasked = hasattr (row , '_fieldmask' )
2613+
25512614 for row in r :
2552- writer .writerow ([func (val ) for func , val in zip (funcs , row )])
2615+ if ismasked :
2616+ row , rowmask = row .item (), row ._fieldmask .item ()
2617+ else :
2618+ rowmask = [False ] * len (row )
2619+ writer .writerow ([func (val , mask , mval ) for func , val , mask , mval
2620+ in zip (funcs , row , rowmask , mvals )])
25532621 if opened :
25542622 fh .close ()
0 commit comments