In [None]:
# %matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
import sys
from Histo import Histo

Plotting: Comparing built-in plt.hist, [plt.errorbar with np.histogram] and my Histo class
--------
Matplotlib has a `plt.hist` function that takes your data directly and plots it in a histogram. It is very memory intensive, because it requires you to keep your whole dataset around. It also does not have good error bar options.

To get error bars, we can use a combination of `np.histogram()` and `plt.errorbar()`. But this suffers from the same memory issue as above.

My Histo class (initiated by e.g. `Histo.FixedWidthConstructor(20,-4.0,4.0)`) has a Fill function that allows you to loop through data and discard it from memory (e.g. if you are running over multi-GB+ files), that you cannot keep open all at once.

In [None]:
ax = plt.subplot()
ax.set_yscale("log")

# First try: pyplot "hist". Eh... not good.
if True :
    xx = np.random.normal(0,1,(100000))
    plt.hist(xx,np.linspace(-4,4,num=21),edgecolor='red',histtype = 'step',fill=None,label='plt.hist')

# second try: np.histogram <- I like this one ... but it can be memory intensive
if False :
    x = np.random.randn(100000)
    print("size of data: {} kB".format(sys.getsizeof(x)/1000))
    counts,bin_edges = np.histogram(x,20,[-4,4])
    #bin_centers = (bin_edges[:-1] + bin_edges[1:])/2.
    bin_centers = bin_edges[:-1] + np.diff(bin_edges) / 2
    err = np.sqrt(counts)
    #print("counts:",counts)
    #print("errors:",err)
    plt.errorbar(bin_centers, counts, yerr=err, fmt='o',label='np.histogram')

# third try: keep track of sumw, sumw2 manually, and errorbar
# Histo class! (Supports 1D, 2D, 3D histograms)
if True :
    histo = Histo.FixedWidthConstructor(20,-4.0,4.0)
    print("size of histo: {} kB".format(sys.getsizeof(histo)))
    
    for i in range(100000) :
        val = np.random.randn()
        histo.Fill(val)
        
    histo.Draw(label='histo.Draw')
    #histo.bar(fill=None,edgecolor='blue',label='Histo \"bar\"')
    histo.hist(edgecolor='purple',histtype='step',fill=None,label='histo.plt_hist')
    
ax.legend()
plt.show()

Another example, showing another marker and error bar style:
-------

In [None]:
fig2,ax2 = plt.subplots()

# 2d histogram
if True :
    histo2 = Histo.FixedWidthConstructor(20,-4.0,4.0)
    
    for i in range(10000) :
        val = np.random.randn()
        histo2.Fill(val)
        
    plt.errorbar(histo2.BinCenters(histo2.xbin_edges),histo2.sumw[1:-1],
                 yerr=np.sqrt(histo2.sumw2[1:-1]), fmt='o',fillstyle='none')


plt.show()

2-D histogram, with variable binning using pcolor and my histo2d
-------
(dividing each bin by its area to normalize)

In [None]:
fig2,ax2 = plt.subplots()
ax2.set(xlabel='x axis', ylabel='y axis',title='Plot title')

# 2d histogram
#histo2d = Histo.FixedWidthConstructor(10,-4,4,10,-4,4)
histo2d = Histo(list(np.linspace(-4, 0, num=20))+list(np.linspace(0.4, 4, num=9)),
                list(np.linspace(-4, 0, num=20))+list(np.linspace(0.4, 4, num=9)))
#histo2d = Histo(list(np.linspace(-4, 0, num=2))+list(np.linspace(1, 4, num=3)),
#                list(np.linspace(-4, 0, num=2))+list(np.linspace(1, 4, num=3)))

for i in range(100000) :
    val1 = np.random.normal(-1)
    val2 = np.random.normal(-1)
    histo2d.Fill(val1,val2)

X,Y = np.meshgrid(histo2d.xbin_edges, histo2d.ybin_edges)
area = (X[:-1,1:]-X[:-1,:-1]) * (Y[1:,:-1]-Y[:-1,:-1])
sumw_width = histo2d.sumw[1:-1,1:-1] / area
plt.pcolor(X, Y, sumw_width,cmap='Blues',vmin=0,vmax=15000)
cbar = plt.colorbar()
cbar.set_label('z axis', rotation=270,labelpad=15)
    
plt.show()

2D binning, without dividing by area:
--------

In [None]:
fig2p1,ax2p1 = plt.subplots()
ax2p1.set(xlabel='x axis', ylabel='y axis',title='Plot title')

X,Y = np.meshgrid(histo2d.xbin_edges, histo2d.ybin_edges)
plt.pcolor(X, Y, histo2d.sumw[1:-1,1:-1],cmap='Blues',vmin=0,vmax=800)
plt.colorbar().set_label('z axis', rotation=270,labelpad=15)

plt.show()

Contourf Example (using meshgrid and my histo2d)
---------
Please note the contourf-specific issues related to getting the colorbar levels to match the plot levels!

In [None]:
fig3,ax3 = plt.subplots()
ax3.set(xlabel='x axis', ylabel='y axis',title='Plot title')

# contours are *point* based plots, so convert our bound into point centers
X,Y = np.meshgrid(histo2d.BinCenters(histo2d.xbin_edges),
                  histo2d.BinCenters(histo2d.ybin_edges))

Xedges,Yedges = np.meshgrid(histo2d.xbin_edges, histo2d.ybin_edges)
area = (Xedges[:-1,1:]-Xedges[:-1,:-1]) * (Yedges[1:,:-1]-Yedges[:-1,:-1])

sumw_width = histo2d.sumw[1:-1,1:-1] / area

# For contourf only:
# If you do not set levels, then you do not have control over the min/max
# of the colorbar !!!
vmin,vmax = 0,18000
levels = np.linspace(vmin, vmax, 10+1)

plt.contourf(X,Y,sumw_width,cmap='Blues',vmin=vmin,vmax=vmax,levels=levels)

ax3.set_xlim([-4,4])
ax3.set_ylim([-4,4])
plt.colorbar().set_label('z axis', rotation=270,labelpad=15)
plt.show()

Stacked Histograms
=======
Let's make stacked histograms from data where the x-values do not exactly line up... using an **outer merge** function.

In [None]:
import pandas as pd
import random

df1 = pd.DataFrame()
df1['x'] = list(np.round(random.random(),2) for a in range(40))
df1['y'] = list(x*0.5 for x in df1['x'])

df2 = pd.DataFrame()
df2['x'] = list(np.round(random.random(),2) for a in range(100))
df2['y'] = list(x for x in df2['x'])

combo = pd.merge(df1,df2,how='outer',on='x',suffixes=('_1','_2'))

# Sort again...
combo.sort_values(by='x',inplace=True)

# First, "forward-fill" nan values
combo.fillna(method='ffill',inplace=True)

# Then, set the remaining ones to 0
combo.fillna(0,inplace=True)

fig4,ax4 = plt.subplots()

ax4.stackplot(combo['x'],[combo['y_1'],combo['y_2']],labels=['y1','y2'],edgecolor='black',alpha=0.8)
ax4.legend(loc='upper left')

# One option for axis ranges:
ax4.axis(xmin=combo['x'].iloc[0],xmax=combo['x'].iloc[-1])

# Another option for axis ranges:
ax4.set_ylim([0,1.8]);

Graph with Filled, Asymmetric Errors
=========

In [None]:
fig5,ax5 = plt.subplots()

x = np.arange(0,500,50)
y = np.random.poisson(np.rint(np.sqrt(x)))
yerr = np.sqrt(y)

ax5.plot(x,y,label='Poisson-distributed')
ax5.set(xlabel='bins', ylabel='Events')
ax5.fill_between(x, y-yerr, y+yerr,
                 alpha=0.5, edgecolor='#1B2ACC', facecolor='#089FFF',label='$\sqrt{n}$')
ax5.legend(loc='lower right');