In [1]:
import seaborn as sns
import numpy as np

In [2]:
%matplotlib ipympl

In [3]:
# Comes from here: https://github.com/mwaskom/seaborn-data
tips = sns.load_dataset('tips')
flights = sns.load_dataset('flights')

In [4]:
print(tips.head())

   total_bill   tip     sex smoker  day    time  size
0       16.99  1.01  Female     No  Sun  Dinner     2
1       10.34  1.66    Male     No  Sun  Dinner     3
2       21.01  3.50    Male     No  Sun  Dinner     3
3       23.68  3.31    Male     No  Sun  Dinner     2
4       24.59  3.61  Female     No  Sun  Dinner     4


In [5]:
print(flights.head())

   year     month  passengers
0  1949   January         112
1  1949  February         118
2  1949     March         132
3  1949     April         129
4  1949       May         121


In [6]:
# Compute the correlation coefficient by pairwise columns excluding NA/null values.
# 1 perfectly correlated (can be -1 if the slope of the 2 values compared is negative)
# 0 no correlation
tc = tips.corr()

In [7]:
# Draw the correlation matrix
sns.heatmap(tc, annot=True, cmap='coolwarm')

FigureCanvasNbAgg()

<matplotlib.axes._subplots.AxesSubplot at 0x7f76ac11cc88>

In [8]:
fp = flights.pivot_table(index='month', columns='year', values='passengers')

In [9]:
fp

year,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
January,112,115,145,171,196,204,242,284,315,340,360,417
February,118,126,150,180,196,188,233,277,301,318,342,391
March,132,141,178,193,236,235,267,317,356,362,406,419
April,129,135,163,181,235,227,269,313,348,348,396,461
May,121,125,172,183,229,234,270,318,355,363,420,472
June,135,149,178,218,243,264,315,374,422,435,472,535
July,148,170,199,230,264,302,364,413,465,491,548,622
August,148,170,199,242,272,293,347,405,467,505,559,606
September,136,158,184,209,237,259,312,355,404,404,463,508
October,119,133,162,191,211,229,274,306,347,359,407,461


In [10]:
sns.heatmap(fp, cmap='magma', linecolor='white', linewidth=1)

<matplotlib.axes._subplots.AxesSubplot at 0x7f76ac11cc88>

In [11]:
# Tries to cluster columns and rows together based on their similarity
sns.clustermap(fp)

FigureCanvasNbAgg()

<seaborn.matrix.ClusterGrid at 0x7f76a0a20eb8>

In [12]:
# Because each year doesn't have the same number of flights we will standardize
# the data to see if there is a common pattern for each year

# Tries to cluster columns and rows together based on their similarity
# Look at the legend, it is now normalized between 0 and 1
# This is because we have just added standard_scale=1

# standard_scale : int or None, optional
# Either 0 (rows) or 1 (columns). Whether or not to standardize that
# dimension, meaning for each row or column, subtract the minimum and
# divide each by its maximum.
sns.clustermap(fp, cmap='coolwarm', standard_scale=1)

FigureCanvasNbAgg()

<seaborn.matrix.ClusterGrid at 0x7f76a08d1438>