# __Data Visualization in Python__
## _Machine and Statistical Learning Club. Spring-2019_

This workshop is intended to introduce different ways to visualize and plot data using Python in the context of Data Science techniques. 

<hr/>


In [None]:
import numpy as np  

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.style as mplstyle
#%matplotlib inline

# Matplotlib 

In [None]:
#Auxiliary function to reset plot parameters.
def styleReset():
    mpl.rcParams.update(mpl.rcParamsDefault)

## A plot generated by matplotlib has the following components:
<img width=50% src="./files/anatomy.png">




#### Simple Plot

In [None]:
styleReset()
x = [10,20,30,40,50]
y = [1,2,6,8,16]

plt.plot(x, y, label='myFunction')

plt.xlabel('x value')
plt.ylabel('y value')
plt.minorticks_on()   #plt.minorticks_off()

plt.title("My plot")

plt.legend()

plt.show()

#### Simple Scatter Plot

In [None]:
styleReset()
x = [0,1,2,3,4]
y = [1,2,4,8,16]
z = np.pi*(np.array(x)**2)

plt.scatter(x, y, label='2^n', marker="x")
plt.scatter(x, z, label='area comp', marker="o",s=z)  #argument s is the size of the marker. 

plt.xlabel('x value')
plt.ylabel('y value')

plt.title("My Scatter Plot")

plt.legend()

plt.show()

#### Three Function Plot

In [None]:
styleReset()
x = np.linspace(0, 2, 100)

plt.plot(x, x,    label='linear')
plt.plot(x, x**2, label='quadratic')
plt.plot(x, x**3, label='cubic')

plt.xlabel('x value')
plt.ylabel('f(x) value')

plt.title("Function Comparison")

plt.legend()

plt.show()

In [None]:
styleReset()
x = np.linspace(0, 2, 20)
plt.plot(x, x,   "r--", label='linear')
plt.plot(x, x**2, "bs",  label='quadratic')
plt.plot(x, x**3, "g^",  label='cubic')

plt.xlabel('x value')
plt.ylabel('f(x) value')

plt.title("Function Comparison")

plt.legend()

plt.show()

#### Multiple Subplots per Figure

In [None]:
x = np.arange(0, 10, 0.2)
y = np.sin(x)
fig, ax = plt.subplots(1,2)


ax[0].plot(x, y, marker='x')
ax[1].plot(x, np.cos(x), marker='o')

plt.show()

#### Customizing the style

In [None]:

mplstyle.use(['dark_background', 'ggplot', 'fast'])

x = np.arange(0, 10, 0.2)
y = np.sin(x)
fig, ax = plt.subplots(1,2)

ax[0].plot(x, y, marker='x')
ax[1].plot(x, np.cos(x), marker='o')


plt.show()


#### Using Dictionary and the _data=_ argument

In [None]:
styleReset()
mplstyle.use([ 'ggplot', 'fast'])

data = {'a': np.arange(50),
        'c': np.random.randint(0, 50, 50),
        'd': np.random.randn(50)}

data['b'] = data['a'] + 10 * np.random.randn(50)

data['d'] = np.abs(data['d']) * 100

plt.scatter('a', 'b', c='c', s='d', data=data)  #C: color s:Scale
plt.xlabel('entry a')
plt.ylabel('entry b')
plt.show()


#### Categorical Plotting 

In [None]:
styleReset()

names = ['group_a', 'group_b', 'group_c']
values = [1, 10, 100]

plt.figure(1, figsize=(9,3))     #Figure of 9inches wide, 3 inches tall.

plt.subplot(131)   #1 row, 3 columns, 1st plot
plt.bar(names, values)

plt.subplot(132)   #1 row, 3 columns, 2nd plot
plt.scatter(names, values)

plt.subplot(133)   #1 row, 3 columns, 3rd plot
plt.plot(names, values)

plt.suptitle('Categorical Plotting')
plt.show()


#### Grid of plots

In [None]:
import matplotlib.gridspec as gridspec

fig1 = plt.figure(num=1, figsize=(50,20))
cols = 3
rows = 2


x = np.linspace(0.1,100,200)
y = []; names = []
y.append(np.sin(x));   names.append("sin(x)")
y.append(np.cos(x));   names.append("cos(x)")
y.append(np.tan(x));   names.append("tan(x)")
y.append(np.log(x));   names.append("log(x)")
y.append(np.log10(x)); names.append("log10(x)")
y.append(np.log2(x));  names.append("log2(x)")


gs = gridspec.GridSpec(rows, cols)

ax = []
for i in range(6):
    row = (i // cols)
    col = i % cols
    ax.append(  fig1.add_subplot(gs[row, col])  )

    ax[-1].plot(x, y[i])

    ax[-1].set_title(names[i])
    plt.xlabel('domain of x')
    plt.ylabel('range of '+names[i])
    
plt.savefig('trig_functions.png')
plt.show()

### Mark points on the line.
The plot argument `markevery` allow to add a mark every some number of elements. `markevery`: None or int or (int, int) or slice or List[int] or float or (float, float)
Which markers to plot.

`every=None`, every point will be plotted.

`every=N`, every N-th marker will be plotted starting with marker 0.

`every=(start, N)`, every N-th marker, starting at point start, will be plotted.

`every=slice(start, end, N)`, every N-th marker, starting at point start, up to but not including point end, will be plotted.

`every=[i, j, m, n]`, only markers at points i, j, m, and n will be plotted.

`every=0.1`, (i.e. a float) then markers will be spaced at approximately equal distances along the line; the distance along the line between markers is determined by multiplying the display-coordinate distance of the axes bounding-box diagonal by the value of every.

`every=(0.5, 0.1)` (i.e. a length-2 tuple of float), the same functionality as every=0.1 is exhibited but the first marker will be 0.5 multiplied by the display-cordinate-diagonal-distance along the line.



In [None]:

# define the data for cartesian plots
delta = 0.11
x = np.linspace(0, 10 - 2 * delta, 200) + delta
y = np.sin(x) + 1.0 + delta

display(x[:5],y[:5])
print("Points in x,y:",len(x))

In [None]:
# define a list of markevery cases to plot
cases = [None,
         8,
         (30, 8),
         [16, 24, 30], [0, -1],
         slice(100, 200, 3),
         0.1, 0.3, 1.5,
         (0.0, 0.1), (0.45, 0.1)]

# define the figure size and grid layout properties
figsize = (20, 16)
cols = 3
gs = gridspec.GridSpec(len(cases) // cols + 1, cols)
gs.update(hspace=0.4)

In [None]:
styleReset()

fig1 = plt.figure(num=1, figsize=figsize)
ax = []
for i, case in enumerate(cases):
    row = (i // cols)
    col = i % cols
    ax.append(fig1.add_subplot(gs[row, col], label="subplot:"+str(row)+"_"+str(col)))
    ax[-1].set_title('markevery=%s' % str(case))
    ax[-1].plot(x, y, 'o', ls='-', ms=4, markevery=case)
plt.show()

#### Customizing and changing axis scale.

In [None]:
fig2 = plt.figure(num=2, figsize=figsize)
axlog = []
mplstyle.use(['dark_background', 'ggplot'])

for i, case in enumerate(cases):
    row = (i // cols)
    col = i % cols
    axlog.append(fig2.add_subplot(gs[row, col], label="subplot:"+str(row)+"_"+str(col)))
    axlog[-1].set_title('markevery=%s' % str(case))
    axlog[-1].set_xscale('log')  #"linear", "log", "symlog", "logit"
    axlog[-1].set_yscale('log')
    axlog[-1].plot(x, y, 'o', ls='-', ms=4, markevery=case)

plt.show()

#### Polar Plots

In [None]:
fig3 = plt.figure(num=4, figsize=figsize)

# define data for polar plots
r = np.linspace(0, 3.0, 200)
theta = 2 * np.pi * r

axpolar = []
for i, case in enumerate(cases):
    row = (i // cols)
    col = i % cols
    axpolar.append(fig3.add_subplot(gs[row, col], projection='polar', label="subplot:"+str(row)+"_"+str(col)))
    axpolar[-1].set_title('markevery=%s' % str(case))
    axpolar[-1].plot(theta, r, 'o', ls='-', ms=4, markevery=case)

plt.show()
