In [None]:
# Simple linear regressions

# Seaborn is a visualization library 
# and it provides a convenient interface to 
# generate complex and great-looking statistical plots.

# Using the auto dataset from Datacamp, I will illustrate a 
# linear regression illustrating the relationship between 
# automobile weight and horse power.

# Import plotting modules - matplotlib and seaborn
import matplotlib.pyplot as plt
import seaborn as sns

# Plot a linear regression between 'weight' and 'hp'
sns.lmplot(x='weight', y='hp', data=auto)

# Display the plot
plt.show()

In [None]:
# Plotting residuals of a regression

# The residuals show how to get a better idea 
# of how well the regression captured the data (see previous cell 
# how to make a linear regression)

# Import plotting modules - plt and sns
import matplotlib.pyplot as plt
import seaborn as sns

# Generate a green residual plot of the regression between 'hp' and 'mpg'
sns.residplot(x='hp', y='mpg', data=auto, color='green')

# Display the plot
plt.show()

In [None]:
# Higher-order regressions

# Generate a scatter plot of 'weight' and 'mpg' using red circles
plt.scatter(auto['weight'], auto['mpg'], label='data', color='red', marker='o')

# Plot in blue a linear regression of order 1 between 'weight' and 'mpg'
sns.regplot(x='weight', y='mpg', data=auto, scatter=None, color='blue', label='First Order')

# Plot in green a linear regression of order 2 between 'weight' and 'mpg'
sns.regplot(x='weight', y='mpg', data=auto, scatter=None, order=2, color='green', label='Second Order')

# Add a legend and display the plot
plt.legend(loc='upper right')
plt.show()

In [None]:
# Grouping linear regressions by hue

# We can use hue to determine between different variables. 
# The distinct groups of points are used to produce distinct 
# regressions with different hues in the plot.

# Plot a linear regression between 'weight' and 'hp', with a hue of 'origin' and palette of 'Set1'
sns.lmplot(x='weight', y='hp', data=auto, hue='origin', palette='Set1')

# Display the plot
plt.show()

In [None]:
# Grouping linear regressions by row or column

# Using the auto dataset from Datacamp again, group the regressions
# by row or col. Here may want to use a grid of subplots. lmplot does this for row/col.

# Plot linear regressions between 'weight' and 'hp' grouped row-wise by 'origin'
sns.lmplot(x='weight', y='hp',data=auto, col='origin', row='origin')

# Display the plot
plt.show()

In [None]:
# Constructing strip plots

# One way of visualizaing data that has a relationship between two continuous variables. 
# How does the distribution of a single continuous variable become affected by a second 
# categorical variable. Plot this with a strip plot.

# Make a strip plot of 'hp' grouped by 'cyl'
plt.subplot(2,1,1)
sns.stripplot(x='cyl', y='hp', data=auto)

# Make the strip plot again using jitter and a smaller point size
plt.subplot(2,1,2)
sns.stripplot(x='cyl', y='hp', data=auto, jitter=True, size=3)

# Display the plot
plt.show()

In [None]:
# Constructing swarm plots

# A swarm plot is a lot like a strip plot but
# spreads out the points to avoid overlap and 
# provides a better visual overview of the data.

# Generate a swarm plot of 'hp' grouped horizontally by 'cyl'  
plt.subplot(2,1,1)
sns.swarmplot(x='cyl', y='hp', data=auto)

# Generate a swarm plot of 'hp' grouped vertically by 'cyl' with a hue of 'origin'
plt.subplot(2,1,2)
sns.swarmplot(x='hp', y='cyl', data=auto, hue='origin', orient='h')

# Display the plot
plt.show()

In [None]:
# Constructing violin plots

# A violin plot is useful to use as it reduces a dataset to have more descriptive 
# statistics and provides a good summary of the data.

# Generate a violin plot of 'hp' grouped horizontally by 'cyl'
plt.subplot(2,1,1)
sns.violinplot(x='cyl', y='hp', data=auto)

# Generate the same violin plot again with a color of 'lightgray' and without inner annotations
plt.subplot(2,1,2)
sns.violinplot(x='cyl', y='hp', data=auto, inner=None, color='lightgray')

# Overlay a strip plot on the violin plot
sns.stripplot(x='cyl', y='hp', data=auto, size=1.5, jitter=True)

# Display the plot
plt.show()

In [None]:
# Plotting joint distributions (1)

# Visualize a bivariate distribution to see how data distributions of 
# continuous random variables vary jointly.

# Generate a joint plot of 'hp' and 'mpg'
sns.jointplot(x='hp', y='mpg', data=auto)

# Display the plot
plt.show()

In [None]:
# Plotting joint distributions (2)

# The seaborn function sns.jointplot() has a 
# parameter kind to specify how to visualize the 
# joint variation of two continuous random variables. Plot a joint distribution.

# Generate a joint plot of 'hp' and 'mpg' using a hexbin plots
sns.jointplot(x='hp', y='mpg', data=auto, kind='hex')

# Display the plot
plt.show()

In [None]:
# Plotting distributions pairwise (1)

# Print the first 5 rows of the DataFrame
print(auto.head())

# Plot the pairwise joint distributions from the DataFrame 
sns.pairplot(auto)

# Display the plot
plt.show()

In [None]:
# Plotting distributions pairwise (2)

# Print the first 5 rows of the DataFrame
print(auto.head())

# Plot the pairwise joint distributions grouped by 'origin' along with regression lines
sns.pairplot(data=auto, kind='reg', hue='origin')

# Display the plot
plt.show()

In [None]:
# Visualizing correlations with a heatmap

# The covariance matrix can be easily visualized as a heatmap. 
# A heatmap is effectively a pseudocolor plot with labelled rows and columns.
# Plot a heatmap with a covariance matrix.

# Print the covariance matrix
print(cov_matrix)

# Visualize the covariance matrix using a heatmap
sns.heatmap(cov_matrix)

# Display the heatmap
plt.show()