Isolated example of bins=(int) vs bins=range()

In [None]:
from matplotlib import gridspec
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# df cols (duration, month, hour, day_of_week, user_type)
df = pd.read_csv('./data/NYC-2016-Summary.csv')

months = ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    
def hist2d_example(gs, df, user_type, bins, manual=False):
    """gs:  grid spec"""
    ax = plt.subplot(gs)
    # plt.subplot(gs[0]).set_title("Testing")
    plt.subplot(ax).set_title("{}.  bins={},  manual={}".format(user_type, bins, manual))
    plt.ylabel("Trip Duration (minutes)")
    plt.xlabel('Month')
    
    x = [int(i) for i in df.month]
    y = [int(i) for i in df.duration]

    # customers by duration by month
    if manual == True:  # 
        H, xedges, yedges = np.histogram2d(x, y, bins=bins)
        ax.pcolorfast(xedges, yedges, H.T)
    else:
        H, xedges, yedges, pc = plt.hist2d(x, y, bins=bins)
        
    plt.colorbar(label='# Trips Taken')
    
    # print some debug info
    print("\n{}.  bins={},  manual={}".format(user_type, bins, manual))
    as_graphed = np.fliplr(H).T  # organize matrix data to correspond to cells in the 2dhist for easier debugging
    print("Hist2d data\n{}".format(as_graphed))
    print("xedges\n{}\nyedges\n{}".format(xedges,yedges))
    
fig = plt.figure(figsize=(20, 10))  # subplot size (w,h tuple in inches)
gs = gridspec.GridSpec(nrows=2, ncols=2)  # flow plt placement, position = index 

df_customer = df[df.user_type == 'Customer']
df_subscriber = df[df.user_type == 'Subscriber']
df_customer   = df_customer[df_customer.duration <  int(np.percentile(df_customer.duration, 95))]
df_subscriber = df_subscriber[df_subscriber.duration <  int(np.percentile(df_subscriber.duration, 95))]

# Only using Month data for user type of Customer.
# Emphasize the matplotlib quesitons and de-emphasize the data analysis.

bins=12
hist2d_example(gs[0], df_customer, 'Customer', bins, manual=False)
hist2d_example(gs[1], df_customer, 'Customer', bins, manual=True)

bins=range(1,14)
hist2d_example(gs[2], df_customer, 'Customer', bins, manual=False)
hist2d_example(gs[3], df_customer, 'Customer', bins, manual=True)

plt.show()

So after a bit of staring it seems the graphs are different because using range() truncates my y (duration) at 12 and i'm only seeing that range in expanded detail, and that is why the graphs are different.  But these questions sill remain.

* Why when given a range does it truncate my yedges at 12?
* Why does xedges need to be from 1 to 13 when range is used and 1 to 12 when an integer is used?  If you change the range to range(1,13) you truncate the right most column of data, and if you specify bins=13 you get an additional column of empty data in the middle of the graph.
* All I want is for my xedges to line up with the columns they represent, and stack overflow suggests using a range to accomplish this, however this caused the above mentioned issues.  Why?  There must be an easier way to accomplish this no?


