In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import cycler
from matplotlib.ticker import FuncFormatter
# %matplotlib inline
# note that matplotlib inline cannot be used here, otherwise interact will plot each new graph on a new axes

In [2]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [3]:
df = pd.read_csv("./data/titanic.csv")
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [4]:
def plot_hist(data, ax=None,x_label=None, y_label=None, title=None, 
              lb=None, ub=None, bins=20, log_x=False, figsize=(6, 4)):
    """
    Plots a histogram using matplotlib's histogram plotting
    
    Parameters
    ----------
    data: pandas series, list of series or df
    ax: if provided, plot on this axis
    x_label, y_label: x and y axis labels
    title: title of chart
    lb, ub: lower bound and upper bound of data to be plotted. This is a global filter applied to all columns.
    bins: number of bins in histogram
    log_x: boolean. whether to use log spaced bins
    figsize: tuple. size of the chart
    """
# TODO: handle when data = list of series or df
#     if isinstance(data, pd.Series):
#         data = pd.DataFrame(data)
#     elif isinstance(data, list):
#         data = pd.DataFrame(data).T
#     for col in data.columns:
#         s = data[col]
    s = data.dropna()
        
    if lb is not None:
        s = s[(s > lb)]
    if ub is not None:
        s = s[(s < ub)]
    
    if ax is None:
        fig, ax = plt.subplots(figsize=figsize)
    
    if log_x == True:
        if (s <= 0).sum() > 0:
            raise ValueError('To use log scale, all values need to be positive') 
#         s = np.log10(s)
        bins = np.logspace(np.log10(s.min()), np.log10(s.max()), bins)
        plt.xscale('log')
        
    ax.hist(s, bins=bins)
    
    if title is not None:
        ax.set_title(title)
    else:
        ax.set_title('Histogram')
    if x_label is not None:
        ax.set_xlabel(x_label)
    if y_label is not None:
        ax.set_ylabel(y_label)

In [5]:
def plot_hist_interact(col):
    plot_hist(df[col], ax=plt.gca())
    plt.show()
#     df[col].hist()

In [6]:
interactive(plot_hist_interact, col=df.select_dtypes(exclude=['object', 'bool']).columns.tolist())