In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
sns.set(color_codes=True)

## Load Iris dataset already embedded in seaborn using Pandas

Note: This already contains the headers in csv and so we need not supply headers like we did with local csv in 01-Iris_Dataset_Analysis

In [None]:
iris = sns.load_dataset('iris')
iris.head()

In [None]:
type(iris) # regardless of where we load, this is still a Pandas Dataframe

In [None]:
iris.info()

In [None]:
iris.describe()

In [None]:
iris.groupby('species').size() # a sino

In [None]:
# Covariance matrix (This is visualized by pair plot below - 2 types and heatmap)
iris.corr() 

## Visualization with Seaborn

1. Hist plot - Kitchen sink, Ungrouped and grouped
2. Ungrouped Pair plot similar to pandas scatter matrix, 
3. Grouped pair plot (where possible. No equivalent in Pandas)
4. Violin Plot
5. Correlation Heat Map
6. Reg Plot (Applicable for continuous dependent variable - unlike here where species is dependent variable)

In [None]:
sns.histplot(iris) # mixed histogram for all x - not much useful

In [None]:
#Multiple type of histplots laid in a nice grid - ALL FOR UNGROUPED

plt.figure(figsize=(12,12))

plt.subplot(2,2,1)
sns.histplot(iris, x='sepal_length') #histograms for a given x, ungrouped

plt.subplot(2,2,2)
# histogram for a given x grouped by species
sns.histplot(data=iris, x='sepal_length', kde=True) #histograms for a given x, ungrouped

plt.subplot(2,2,3)
sns.histplot(data=iris, y="petal_length")

plt.subplot(2,2,4)
sns.histplot(data=iris, y="petal_length", kde=True)

In [None]:
#Multiple type of histplots laid in a nice grid - GROUPED

plt.figure(figsize=(20,20))

plt.subplot(4,2,1)
# histogram for a given x grouped by species
sns.histplot(data=iris, x='sepal_length', hue="species")

plt.subplot(4,2,2)
# histogram for a given x grouped by species
sns.histplot(iris, x='sepal_length', hue="species", multiple="stack")

plt.subplot(4,2,3)
sns.histplot(data=iris, x='sepal_length', hue="species", kde=True)

plt.subplot(4,2,4)
sns.histplot(iris, x="sepal_length", hue="species", element="step")

plt.subplot(4,2,5)
sns.histplot(iris, x="sepal_length", hue="species", element="poly")

plt.subplot(4,2,6)
sns.histplot(data=iris, x="sepal_length", hue="species", multiple="dodge", shrink=.8)

plt.subplot(4,2,7)
# histogram for a given x grouped by species
sns.histplot(data=iris, y="petal_length", hue="species")

plt.subplot(4,2,8)
sns.histplot(data=iris, x="sepal_length", hue="species", multiple="dodge", shrink=.8, kde=True)

In [None]:
#Multiple type of histplots laid in a nice grid - STATISTICS HIST PLOTS

plt.figure(figsize=(12,8))

plt.subplot(2,2,1)
#stat one of count, probability density, ...
sns.histplot(data=iris, x="sepal_length", stat="probability", discrete=True)

plt.subplot(2,2,2)
#Density plot
sns.histplot(
    iris, x="petal_length", hue="species", element="step",
    stat="density", common_norm=False,
)

plt.subplot(2,2,3)
sns.histplot(data=iris, x="sepal_length", log_scale=True) #logScale - good option for high variance in y val plots

plt.subplot(2,2,4)
sns.histplot(
    data=iris, x="sepal_length", hue="species",
    hue_order=["setosa", "virginica", "versicolor"],
    element="step", fill=False,
    cumulative=True, stat="density", common_norm=False,
)


In [None]:
sns.pairplot(iris)

In [None]:
sns.pairplot(iris, hue='species')

In [None]:
sns.pairplot(iris, hue='species', height=3, aspect=1)

In [None]:
plt.figure(figsize=(12,8))
plt.subplot(2,2,1)
sns.violinplot(x='species', y='sepal_length', data=iris)
plt.subplot(2,2,2)
sns.violinplot(x='species', y='sepal_width', data=iris)
plt.subplot(2,2,3)
sns.violinplot(x='species', y='petal_length', data=iris)
plt.subplot(2,2,4)
sns.violinplot(x='species', y='petal_width', data=iris)

In [None]:
sns.heatmap(iris.corr(), annot=True)
plt.show()

## Matplot Demo

In [None]:
# create some random data
x = np.random.randn(100)
rand_array = np.random.randn(100)
print(type(rand_array))
print(rand_array.shape)
y = 3 * x + rand_array
y

In [None]:
plt.scatter(x,y) #v matplotlib scatter

In [None]:
# matplotlib plot
T = np.array([6, 7, 8, 9, 10, 11, 12])
power = np.array([1.53E+03, 5.92E+02, 2.04E+02, 7.24E+01, 2.72E+01, 1.10E+01, 4.70E+00])

plt.plot(T,power)
plt.show()

In [None]:

from scipy.interpolate import make_interp_spline, BSpline
# 300 represents number of points to make between T.min and T.max
xnew = np.linspace(T.min(), T.max(), 300) 

spl = make_interp_spline(T, power, k=3)  # type: BSpline
power_smooth = spl(xnew)

plt.plot(xnew, power_smooth)
plt.show()

## Reg Plot Demo

In [None]:
# create some random data
x = np.random.randn(100)
rand_array = np.random.randn(100)
print(type(rand_array))
print(rand_array.shape)
y = 3 * x + rand_array

plt.figure(figsize=(12,10))
sns.regplot(x,y)
plt.xlabel('avg num of rooms')
plt.ylabel('Median house value in K')
plt.show()

In [None]:
sns.jointplot(x=iris.sepal_length, y=iris.petal_length, data="iris", kind='reg', height=10)
plt.show()