# Data Visualization in python

### Matplotlib
Matplotlib is a comprehensive library for creating static, animated, and interactive visualizations in Python.

https://matplotlib.org/

### Pandas plot
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.plot.html

### Seaborn
Seaborn is a Python data visualization library based on matplotlib. It provides a high-level interface for drawing attractive and informative statistical graphics.

https://seaborn.pydata.org/

## Matplotlib

In [41]:
import matplotlib as mpl
%matplotlib notebook

The matplotlib is configured to render into the browser.

This configuration is called a backend (backend layer).

In [42]:
mpl.get_backend()

'nbAgg'

In [43]:
mpl.rcParams['figure.figsize'] = (4.0, 3.0)

The sub-library pyplot is for plotting data (scripting layer).

https://matplotlib.org/api/_as_gen/matplotlib.pyplot.html#module-matplotlib.pyplot

In [44]:
import matplotlib.pyplot as plt

In [45]:
plt.style.available

['seaborn-dark',
 'seaborn-darkgrid',
 'seaborn-ticks',
 'fivethirtyeight',
 'seaborn-whitegrid',
 'classic',
 '_classic_test',
 'fast',
 'seaborn-talk',
 'seaborn-dark-palette',
 'seaborn-bright',
 'seaborn-pastel',
 'grayscale',
 'seaborn-notebook',
 'ggplot',
 'seaborn-colorblind',
 'seaborn-muted',
 'seaborn',
 'Solarize_Light2',
 'seaborn-paper',
 'bmh',
 'tableau-colorblind10',
 'seaborn-white',
 'dark_background',
 'seaborn-poster',
 'seaborn-deep']

In [46]:
plt.style.use('seaborn-colorblind')

Examples:

    Line plotting (plt.plot)
    Bar chart (plt.bar)
    Subplot (plt.subplot)
    Histograms (hist)
    Heatmap (hist2d)

### Line plotting

In [47]:
plt.plot?

Simply plot a dot in a figure

In [48]:
plt.figure()

plt.plot(3,2,'.');

<IPython.core.display.Javascript object>

In [49]:
plt.figure()
plt.plot(1.5, 1.5, 'o');


<IPython.core.display.Javascript object>

In [50]:
plt.plot(2,2,'o');


In [51]:
plt.plot(2.5, 2.5, 'o');

Plot lines for lists of data points

In [52]:
import numpy as np
linear_data = np.array([1,2,3,4,5,6,7])
quadratic_data = linear_data**2

In [53]:
linear_data

array([1, 2, 3, 4, 5, 6, 7])

In [54]:
quadratic_data

array([ 1,  4,  9, 16, 25, 36, 49])

In [55]:
plt.figure();
plt.plot(linear_data, '-o');
plt.plot(quadratic_data, '--o');

<IPython.core.display.Javascript object>

In [56]:
plt.xlabel('Your x data');

In [57]:
plt.ylabel('Your y data');

In [58]:
plt.title('A title');

In [59]:
plt.legend(['Baseline', 'Competition']);

In [60]:
plt.plot([10, 15, 25], '--s');

In [61]:
plt.legend(['Baseline', 'Competition', 'us']);

In [62]:
plt.fill_between(range(len(linear_data)), 
                      linear_data, quadratic_data,
                      facecolor='blue',
                      alpha=0.1);

In [63]:
ax = plt.gca()
ax.axis([0,6,0,20])

[0, 6, 0, 20]

In [64]:
ax.relim() 
ax.autoscale()

### Bar chart

In [65]:
plt.figure()

xvals = range(len(linear_data))
print(xvals)

plt.bar(xvals,linear_data, width=0.3);

<IPython.core.display.Javascript object>

range(0, 7)


In [66]:
new_xvals=[]
for item in xvals:
    new_xvals.append(item+0.3)
    
plt.bar(new_xvals, quadratic_data, width=0.3, color='red')

<BarContainer object of 7 artists>

In [67]:
plt.figure();

plt.bar(xvals, linear_data, width=0.3, color='b')

plt.bar(xvals, quadratic_data, width=0.3, bottom=linear_data, color='r')

<IPython.core.display.Javascript object>

<BarContainer object of 7 artists>

In [68]:
plt.figure();

plt.barh(xvals, linear_data, height=0.3, color='b')

plt.barh(xvals, quadratic_data, height=0.3, left=linear_data, color='r')

<IPython.core.display.Javascript object>

<BarContainer object of 7 artists>

### Subplot

In [69]:
plt.figure()

# subplot with 1 row, 2 columns, and current axis is 1st subplot axes
plt.subplot(1, 2, 1)

plt.plot(linear_data, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x126b44150>]

In [70]:
plt.subplot(1,2,2);

plt.plot(quadratic_data, '-o');

In [71]:
plt.figure();

ax1 = plt.subplot(1, 2, 1);

plt.plot(linear_data, '-o');

# pass sharey=ax1 to ensure the two subplots share the same y axis
ax2 = plt.subplot(1, 2, 2, sharey=ax1);

plt.plot(quadratic_data, '-x');

<IPython.core.display.Javascript object>

In [72]:
fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True)


<IPython.core.display.Javascript object>

In [73]:
ax5.plot(linear_data, '-');

In [74]:
ax3.plot(quadratic_data, '-');

### Histograms

In [75]:
# create 2x2 grid of axis subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

# draw n = 10, 100, 1000, and 10000 samples from the normal distribution and plot corresponding histograms
for n in range(0,4):
    # generating random numbers
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    
    # histogram plotting
    axs[n].hist(sample, bins=100)
    
    # title for each axes
    axs[n].set_title('n={}'.format(sample_size))

<IPython.core.display.Javascript object>

### Heatmaps (2d-histograms)

In [76]:
plt.figure()

X = np.random.random(size=10000)

Y = np.random.normal(loc=0.0,scale=1.0,size=10000)

plt.hist2d(X, Y, bins=100, cmap='rainbow');
## cmap: color map options
## https://matplotlib.org/3.2.1/tutorials/colors/colormaps.html

<IPython.core.display.Javascript object>

In [77]:
plt.colorbar();

### Animations
https://matplotlib.org/3.2.1/api/animation_api.html#animation

## Empty figure 
fig, ax = plt.subplots()
## Empty data
xdata, ydata = [], []
## Empty plot
ln, = plt.plot([], [], 'ro')

## you can preset the figure size
#ax.set_xlim(0, 2*np.pi)
#ax.set_ylim(-1, 1)

def update(frame):
    ## add new one data point per frame
    xdata.append(frame)
    ydata.append(np.sin(frame))
    
    ## update the plot
    ln.set_data(xdata, ydata)
    
    ## auto rescale figure size
    ax.relim()      
    ax.autoscale()
    return ln,

from matplotlib.animation import FuncAnimation

ani = FuncAnimation(fig, update, frames=np.linspace(0, 4*np.pi, 64))

ani.save('myAnimation.gif', writer='imagemagick', fps=30)

## Pandas plotting

https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.plot.html

Iris flower data set https://en.wikipedia.org/wiki/Iris_flower_data_set

In [78]:
import pandas as pd
columns=['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Name']
iris = pd.read_csv('http://scv.bu.edu/examples/python/data_visualization/iris.data.csv')
iris.columns=columns
iris

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


### DataFrame.plot()

kind : str
    - 'line' : line plot (default)
    - 'bar' : vertical bar plot
    - 'barh' : horizontal bar plot
    - 'hist' : histogram
    - 'box' : boxplot
    - 'kde' : Kernel Density Estimation plot
    - 'density' : same as 'kde'
    - 'area' : area plot
    - 'pie' : pie plot
    - 'scatter' : scatter plot
    - 'hexbin' : hexbin plot

In [79]:
iris.plot(y='SepalLength')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x126097110>

In [80]:
iris.plot?

In [81]:
iris.plot();

<IPython.core.display.Javascript object>

In [82]:
fig=iris.plot(kind='box')
fig.set_xticklabels(['sl','sw','pl','pw']);

<IPython.core.display.Javascript object>

In [83]:
#dir(fig)

In [84]:
fig.set_xlabel('features')
fig.set_ylabel('inches')

Text(0, 0.5, 'inches')

In [85]:
iris.plot(kind='hist');

<IPython.core.display.Javascript object>

In [86]:
iris.plot(kind='kde');

<IPython.core.display.Javascript object>

#### Plot results from data manipulation

In [87]:
iris.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


In [88]:
iris.groupby('Name').mean()

Unnamed: 0_level_0,SepalLength,SepalWidth,PetalLength,PetalWidth
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Iris-setosa,5.004082,3.416327,1.465306,0.244898
Iris-versicolor,5.936,2.77,4.26,1.326
Iris-virginica,6.588,2.974,5.552,2.026


In [89]:
fig, ax = plt.subplots()
iris.groupby('Name').mean().plot(kind='bar', ax=ax, rot=0);
ax.set_ylabel('cm')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'cm')

In [90]:
pd.plotting.scatter_matrix(iris);

<IPython.core.display.Javascript object>

## Seaborn
https://seaborn.pydata.org/

In [91]:
import seaborn as sns

In [92]:
sns.pairplot(iris, hue='Name', diag_kind='kde', size=1.5);



<IPython.core.display.Javascript object>

In [93]:
fig = plt.figure(figsize=(8,3))
plt.subplot(1,2,1)
sns.swarmplot('Name', 'PetalLength', data=iris);
plt.subplot(1,2,2)
sns.boxplot('Name', 'PetalLength', data=iris);

<IPython.core.display.Javascript object>

### x vs y by seaborn

In [94]:
np.random.seed(1234)

v1 = pd.Series(np.random.normal(0,10,1000), name='v1')
v2 = pd.Series(2*v1 + np.random.normal(60,15,1000), name='v2')

In [95]:
plt.figure()
plt.hist(v1, alpha=0.7, bins=np.arange(-50,150,5), label='v1');
plt.hist(v2, alpha=0.7, bins=np.arange(-50,150,5), label='v2');
plt.legend();

  """Entry point for launching an IPython kernel.


<IPython.core.display.Javascript object>

In [96]:
plt.figure()
plt.scatter(v1,v2)

  """Entry point for launching an IPython kernel.


<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x1a2a678ed0>

In [97]:
sns.jointplot(v1, v2, alpha=0.4);

  f = plt.figure(figsize=(height, height))


<IPython.core.display.Javascript object>

In [98]:
sns.jointplot(v1, v2, kind='hex');

<IPython.core.display.Javascript object>

In [99]:
# set the seaborn style for all the following plots
sns.set_style('white')

sns.jointplot(v1, v2, kind='kde', space=0);

<IPython.core.display.Javascript object>

In [100]:
plt.savefig('temp.png')

## Please fill an evaluation
http://scv.bu.edu/eval
    
## Future questions for SCC
help@scc.bu.edu