Skip to content

Commit 6ab3c85

Browse files
committed
added manuels scatter pie example
svn path=/trunk/matplotlib/; revision=5066
1 parent e48e356 commit 6ab3c85

File tree

3 files changed

+153
-23
lines changed

3 files changed

+153
-23
lines changed

examples/scatter_piecharts.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""
2+
This example makes custom 'pie charts' as the markers for a scatter plotqu
3+
4+
Thanks to Manuel Metz for the example
5+
"""
6+
import math
7+
import numpy as np
8+
import matplotlib.pyplot as plt
9+
10+
# first define the ratios
11+
r1 = 0.2 # 20%
12+
r2 = r1 + 0.4 # 40%
13+
14+
# define some sizes of the scatter marker
15+
sizes = [60,80,120]
16+
17+
# calculate the points of the first pie marker
18+
#
19+
# these are just the origin (0,0) +
20+
# some points on a circle cos,sin
21+
x = [0] + np.cos(np.linspace(0, 2*math.pi*r1, 10)).tolist()
22+
y = [0] + np.sin(np.linspace(0, 2*math.pi*r1, 10)).tolist()
23+
xy1 = zip(x,y)
24+
25+
# ...
26+
x = [0] + np.cos(np.linspace(2*math.pi*r1, 2*math.pi*r2, 10)).tolist()
27+
y = [0] + np.sin(np.linspace(2*math.pi*r1, 2*math.pi*r2, 10)).tolist()
28+
xy2 = zip(x,y)
29+
30+
x = [0] + np.cos(np.linspace(2*math.pi*r2, 2*math.pi, 10)).tolist()
31+
y = [0] + np.sin(np.linspace(2*math.pi*r2, 2*math.pi, 10)).tolist()
32+
xy3 = zip(x,y)
33+
34+
35+
fig = plt.figure()
36+
ax = fig.add_subplot(111)
37+
ax.scatter( np.arange(3), np.arange(3), marker=(xy1,0), s=sizes, facecolor='blue' )
38+
ax.scatter( np.arange(3), np.arange(3), marker=(xy2,0), s=sizes, facecolor='green' )
39+
ax.scatter( np.arange(3), np.arange(3), marker=(xy3,0), s=sizes, facecolor='red' )
40+
plt.show()

lib/matplotlib/mlab.py

Lines changed: 91 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787

8888
import numpy as npy
8989

90+
9091
from matplotlib import nxutils
9192
from matplotlib import cbook
9293

@@ -2143,10 +2144,10 @@ def key_desc(name):
21432144

21442145

21452146
def csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',',
2146-
converterd=None, names=None, missing=None):
2147+
converterd=None, names=None, missing='', missingd=None):
21472148
"""
21482149
Load data from comma/space/tab delimited file in fname into a
2149-
numpy record array and return the record array.
2150+
numpy (m)record array and return the record array.
21502151
21512152
If names is None, a header row is required to automatically assign
21522153
the recarray names. The headers will be lower cased, spaces will
@@ -2172,13 +2173,24 @@ def csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',',
21722173
names, if not None, is a list of header names. In this case, no
21732174
header will be read from the file
21742175
2176+
missingd - is a dictionary mapping munged column names to field values
2177+
which signify that the field does not contain actual data and should
2178+
be masked, e.g. '0000-00-00' or 'unused'
2179+
2180+
missing - a string whose value signals a missing field regardless of
2181+
the column it appears in, e.g. 'unused'
2182+
21752183
if no rows are found, None is returned -- see examples/loadrec.py
21762184
"""
21772185

21782186
if converterd is None:
21792187
converterd = dict()
21802188

2189+
if missingd is None:
2190+
missingd = {}
2191+
21812192
import dateutil.parser
2193+
import datetime
21822194
parsedate = dateutil.parser.parse
21832195

21842196

@@ -2226,13 +2238,27 @@ def process_skiprows(reader):
22262238

22272239
process_skiprows(reader)
22282240

2229-
dateparser = dateutil.parser.parse
2241+
def ismissing(name, val):
2242+
"Should the value val in column name be masked?"
22302243

2231-
def myfloat(x):
2232-
if x==missing:
2233-
return npy.nan
2244+
if val == missing or val == missingd.get(name) or val == '':
2245+
return True
22342246
else:
2235-
return float(x)
2247+
return False
2248+
2249+
def with_default_value(func, default):
2250+
def newfunc(name, val):
2251+
if ismissing(name, val):
2252+
return default
2253+
else:
2254+
return func(val)
2255+
return newfunc
2256+
2257+
dateparser = dateutil.parser.parse
2258+
mydateparser = with_default_value(dateparser, datetime.date(1,1,1))
2259+
myfloat = with_default_value(float, npy.nan)
2260+
myint = with_default_value(int, -1)
2261+
mystr = with_default_value(str, '')
22362262

22372263
def mydate(x):
22382264
# try and return a date object
@@ -2241,16 +2267,16 @@ def mydate(x):
22412267
if d.hour>0 or d.minute>0 or d.second>0:
22422268
raise ValueError('not a date')
22432269
return d.date()
2270+
mydate = with_default_value(mydate, datetime.date(1,1,1))
22442271

2245-
2246-
def get_func(item, func):
2272+
def get_func(name, item, func):
22472273
# promote functions in this order
2248-
funcmap = {int:myfloat, myfloat:mydate, mydate:dateparser, dateparser:str}
2249-
try: func(item)
2274+
funcmap = {myint:myfloat, myfloat:mydate, mydate:mydateparser, mydateparser:mystr}
2275+
try: func(name, item)
22502276
except:
2251-
if func==str:
2277+
if func==mystr:
22522278
raise ValueError('Could not find a working conversion function')
2253-
else: return get_func(item, funcmap[func]) # recurse
2279+
else: return get_func(name, item, funcmap[func]) # recurse
22542280
else: return func
22552281

22562282

@@ -2266,7 +2292,7 @@ def get_converters(reader):
22662292
converters = None
22672293
for i, row in enumerate(reader):
22682294
if i==0:
2269-
converters = [int]*len(row)
2295+
converters = [myint]*len(row)
22702296
if checkrows and i>checkrows:
22712297
break
22722298
#print i, len(names), len(row)
@@ -2276,10 +2302,10 @@ def get_converters(reader):
22762302
if func is None:
22772303
func = converterd.get(name)
22782304
if func is None:
2279-
if not item.strip(): continue
2305+
#if not item.strip(): continue
22802306
func = converters[j]
22812307
if len(item.strip()):
2282-
func = get_func(item, func)
2308+
func = get_func(name, item, func)
22832309
converters[j] = func
22842310
return converters
22852311

@@ -2307,7 +2333,7 @@ def get_converters(reader):
23072333
item = itemd.get(item, item)
23082334
cnt = seen.get(item, 0)
23092335
if cnt>0:
2310-
names.append(item + '%d'%cnt)
2336+
names.append(item + '_%d'%cnt)
23112337
else:
23122338
names.append(item)
23132339
seen[item] = cnt+1
@@ -2327,15 +2353,24 @@ def get_converters(reader):
23272353
# iterate over the remaining rows and convert the data to date
23282354
# objects, ints, or floats as approriate
23292355
rows = []
2356+
rowmasks = []
23302357
for i, row in enumerate(reader):
23312358
if not len(row): continue
23322359
if row[0].startswith(comments): continue
2333-
rows.append([func(val) for func, val in zip(converters, row)])
2360+
rows.append([func(name, val) for func, name, val in zip(converters, names, row)])
2361+
rowmasks.append([ismissing(name, val) for name, val in zip(names, row)])
23342362
fh.close()
23352363

23362364
if not len(rows):
23372365
return None
2338-
r = npy.rec.fromrecords(rows, names=names)
2366+
if npy.any(rowmasks):
2367+
try: from numpy.ma import mrecords
2368+
except ImportError:
2369+
raise RuntimeError('numpy 1.05 or later is required for masked array support')
2370+
else:
2371+
r = mrecords.fromrecords(rows, names=names, mask=rowmasks)
2372+
else:
2373+
r = npy.rec.fromrecords(rows, names=names)
23392374
return r
23402375

23412376

@@ -2529,26 +2564,59 @@ def format(item, just_pad_prec_spacer):
25292564

25302565

25312566

2532-
def rec2csv(r, fname, delimiter=',', formatd=None):
2567+
def rec2csv(r, fname, delimiter=',', formatd=None, missing='',
2568+
missingd=None):
25332569
"""
2534-
Save the data from numpy record array r into a comma/space/tab
2570+
Save the data from numpy (m)recarray r into a comma/space/tab
25352571
delimited file. The record array dtype names will be used for
25362572
column headers.
25372573
25382574
25392575
fname - can be a filename or a file handle. Support for gzipped
25402576
files is automatic, if the filename ends in .gz
2577+
2578+
See csv2rec and rec2csv for information about missing and
2579+
missingd, which can be used to fill in masked values into your CSV
2580+
file.
25412581
"""
2582+
2583+
if missingd is None:
2584+
missingd = dict()
2585+
2586+
def with_mask(func):
2587+
def newfunc(val, mask, mval):
2588+
if mask:
2589+
return mval
2590+
else:
2591+
return func(val)
2592+
return newfunc
2593+
25422594
formatd = get_formatd(r, formatd)
25432595
funcs = []
25442596
for i, name in enumerate(r.dtype.names):
2545-
funcs.append(csvformat_factory(formatd[name]).tostr)
2597+
funcs.append(with_mask(csvformat_factory(formatd[name]).tostr))
25462598

25472599
fh, opened = cbook.to_filehandle(fname, 'w', return_opened=True)
25482600
writer = csv.writer(fh, delimiter=delimiter)
25492601
header = r.dtype.names
25502602
writer.writerow(header)
2603+
2604+
# Our list of specials for missing values
2605+
mvals = []
2606+
for name in header:
2607+
mvals.append(missingd.get(name, missing))
2608+
2609+
ismasked = False
2610+
if len(r):
2611+
row = r[0]
2612+
ismasked = hasattr(row, '_fieldmask')
2613+
25512614
for row in r:
2552-
writer.writerow([func(val) for func, val in zip(funcs, row)])
2615+
if ismasked:
2616+
row, rowmask = row.item(), row._fieldmask.item()
2617+
else:
2618+
rowmask = [False] * len(row)
2619+
writer.writerow([func(val, mask, mval) for func, val, mask, mval
2620+
in zip(funcs, row, rowmask, mvals)])
25532621
if opened:
25542622
fh.close()

unit/mlab_unit.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,5 +55,27 @@ def test_csv2rec_roundtrip(self):
5555
print 'repr(dt.type)',repr(dt.type)
5656
self.failUnless( numpy.all(ra[name] == ra2[name]) ) # should not fail with numpy 1.0.5
5757

58+
def test_csv2rec_masks(self):
59+
# Make sure masked entries survive roundtrip
60+
61+
csv = """date,age,weight,name
62+
2007-01-01,12,32.2,"jdh1"
63+
0000-00-00,0,23,"jdh2"
64+
2007-01-03,,32.5,"jdh3"
65+
2007-01-04,12,NaN,"jdh4"
66+
2007-01-05,-1,NULL,"""
67+
missingd = dict(date='0000-00-00', age='-1', weight='NULL')
68+
fh = StringIO.StringIO(csv)
69+
r1 = mlab.csv2rec(fh, missingd=missingd)
70+
fh = StringIO.StringIO()
71+
mlab.rec2csv(r1, fh, missingd=missingd)
72+
fh.seek(0)
73+
r2 = mlab.csv2rec(fh, missingd=missingd)
74+
75+
self.failUnless( numpy.all( r2['date'].mask == [0,1,0,0,0] ))
76+
self.failUnless( numpy.all( r2['age'].mask == [0,0,1,0,1] ))
77+
self.failUnless( numpy.all( r2['weight'].mask == [0,0,0,0,1] ))
78+
self.failUnless( numpy.all( r2['name'].mask == [0,0,0,0,1] ))
79+
5880
if __name__=='__main__':
5981
unittest.main()

0 commit comments

Comments
 (0)