# Notebook 8 - Visualization with Seaborn

<img src="https://seaborn.pydata.org/_images/logo-wide-lightbg.svg" width = "300">


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

sns.set()  # seaborn's method to set its chart style

## Bar plot 

In [None]:
# reading the dataset  
df = sns.load_dataset('tips')  
df

In [None]:
# Plotting bar plot
sns.barplot(x ='sex', y ='total_bill', data = df, estimator='mean');

### Color palletes

<img src="https://jlcoto.github.io/img/brewer_sequential.png" width = "800">

In [None]:
# Plotting bar plot
sns.barplot(x ='sex', y ='total_bill', data = df,
            palette ='plasma', estimator='mean')

### Change orientation

In [None]:
# Plotting bar plot
sns.barplot(y ='sex', x ='total_bill', data = df,   
            palette ='Oranges', estimator='median', orient="h")

### Stacked bar plot

In [None]:
# create DataFrame
df = pd.DataFrame({'High Temp': [28, 30, 34, 38, 45, 42,
                                 38, 35, 32, 28, 25, 21],
                   'Low Temp': [22, 26, 30, 32, 41, 38,
                                32, 31, 28, 22, 15, 15],
                   'Avg Temp': [25, 28, 32, 35, 43, 40, 
                                35, 33, 30, 25, 20, 18]},
                  index=['Jan', 'Feb', 'Mar', 'Apr', 'May',
                         'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
                         'Nov', 'Dec'])
 
df

In [None]:
# create stacked bar chart for monthly temperatures
df.plot(kind='bar', stacked=True, color=['red', 'skyblue', 'green'])
 
# labels for x & y axis
plt.xlabel('Months')
plt.ylabel('Temp ranges in Degree Celsius')
 
# title of plot
plt.title('Monthly Temperatures in a year')

### Grouped bar chart

In [None]:
penguins = sns.load_dataset("penguins")

# Draw a nested barplot by species and sex
g = sns.catplot(
    data=penguins, kind="bar",
    x="species", y="body_mass_g", hue="sex",
    errorbar="sd", alpha=.6, height=6
)
g.set_axis_labels("", "Body mass (g)")
g.legend.set_title("")

## Count plot

In [None]:
sns.countplot(x ='sex', data = df);

### Box plot

In [None]:
sns.boxplot(y='total_bill', data=df);

### Use x axis

In [None]:
sns.boxplot(x='day', y='total_bill', data=df);

### Use hue

'hue' is used to visualize the data of different categories in one plot.

In [None]:

sns.boxplot(x='day', y='total_bill', data=df, hue='smoker') ;

## Violinplot

In [None]:
sns.violinplot(y='total_bill', data=df);

In [None]:
sns.violinplot(x='day', y='total_bill', data=df, 
               hue='sex', split=True) 

## Stripplot

It basically creates a scatter plot based on the category.



In [None]:
sns.stripplot(x='day', y='total_bill', data=df, 
              jitter=True);

### Use hue

In [None]:
sns.stripplot(x='day', y='total_bill', data=df, 
              hue='smoker') 

### Use dodge

In [None]:
sns.stripplot(x='day', y='total_bill', data=df, 
              jitter=False, hue='smoker', dodge=True) 

### Use jitter

In [None]:
sns.stripplot(x='day', y='total_bill', data=df, 
              jitter=True, hue='smoker', dodge=True) 

## Histogram

In [None]:
# Generate a random univariate dataset
rs = np.random.RandomState(10)
d = rs.normal(size=100)
 
# Plot a simple histogram
sns.histplot(d, kde=False, color="m")

### Use KDE

In statistics, kernel density estimation (KDE) is the application of kernel smoothing for probability density estimation, i.e., a non-parametric method to estimate the probability density function of a random variable based on kernels as weights.

In [None]:
# Plot a simple histogram and kde
sns.histplot(d, kde=True, color="m")

In [None]:
# data x and y axis for seaborn
x= np.random.randn(200)
y = np.random.randn(200)
 
# Kde for x var
sns.kdeplot(x)

In [None]:
sns.kdeplot(x, fill = True)

In [None]:
sns.kdeplot(x=x,y=y)

In [None]:
sns.kdeplot(x=x,y=y,fill = True, cbar=True)

## Line plot

In [None]:
fmri = sns.load_dataset("fmri")
fmri

In [None]:
# Plot the responses for different\
# events and regions
sns.lineplot(x="timepoint",
             y="signal",
             hue="region",
             style="event",
             data=fmri);

## Area chart

In [None]:
#set seaborn style
sns.set_theme()
 
#define DataFrame
df = pd.DataFrame({'period': [1, 2, 3, 4, 5, 6, 7, 8],
                   'team_A': [20, 12, 15, 14, 19, 23, 25, 29],
                   'team_B': [5, 7, 7, 9, 12, 9, 9, 4],
                   'team_C': [11, 8, 10, 6, 6, 5, 9, 12]})
df

In [None]:
#create area chart
plt.stackplot(df.period, df.team_A, df.team_B, df.team_C);

In [None]:
#define colors to use in chart
color_map = ['red', 'steelblue', 'pink']
    
#create area chart
plt.stackplot(df.period, df.team_A, df.team_B, df.team_C,
              labels=['Team A', 'Team B', 'Team C'],
              colors=color_map)

#add legend
plt.legend(loc='upper left')

#add axis labels
plt.xlabel('Period')
plt.ylabel('Points Scored')

#display area chart
plt.show()

## Multivariate views on complex datasets

### jointplot

Some seaborn functions combine multiple kinds of plots to quickly give informative summaries of a dataset. One, jointplot(), focuses on a single relationship. It plots the joint distribution between two variables along with each variable’s marginal distribution:

In [None]:
penguins = sns.load_dataset("penguins")
sns.jointplot(data=penguins, x="flipper_length_mm", y="bill_length_mm", hue="species");

### pairplot

The other, pairplot(), takes a broader view: it shows joint and marginal distributions for all pairwise relationships and for each variable, respectively:

In [None]:
sns.pairplot(data=penguins, hue="species");

### Multiple plots

In [None]:
# loading of a dataframe from seaborn 
df = sns.load_dataset('tips') 
  
############# Main Section         ############# 
# Form a facetgrid using columns with a hue 
graph = sns.FacetGrid(df, col ="sex",  hue ="day") 

# map the above form facetgrid with some attributes 
graph.map(plt.scatter, "total_bill", "tip", edgecolor ="w").add_legend() 

# show the object 
plt.show() 

In [None]:
# Form a facetgrid using columns with a hue 
graph = sns.FacetGrid(df, row ='smoker', col ='time') 
# map the above form facetgrid with some attributes 
graph.map(plt.hist, 'total_bill', bins = 15, color ='orange') 
# show the object 
plt.show()

In [None]:
#create DataFrame
df = pd.DataFrame({'team': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B'],
                   'points': [19, 12, 15, 14, 19, 23, 25, 29],
                   'assists': [13, 15, 11, 8, 6, 8, 11, 14],
                   'rebounds': [11, 7, 8, 12, 13, 7, 6, 8],
                   'blocks': [1, 2, 2, 3, 5, 4, 3, 3]})

#view DataFrame
df

In [None]:
#set seaborn plotting aesthetics as default
sns.set()

#define plotting region (2 rows, 2 columns)
fig, axes = plt.subplots(2, 2)

#create boxplot in each subplot
sns.boxplot(data=df, x='team', y='points', ax=axes[0,0])
sns.boxplot(data=df, x='team', y='assists', ax=axes[0,1])
sns.boxplot(data=df, x='team', y='rebounds', ax=axes[1,0])
sns.boxplot(data=df, x='team', y='blocks', ax=axes[1,1])

In [None]:
#set seaborn plotting aesthetics as default
sns.set()

#define plotting region (1 row, 2 columns)
fig, axes = plt.subplots(1, 2)

#create boxplot in each subplot
sns.violinplot(data=df, x='team', y='points', ax=axes[0])
sns.violinplot(data=df, x='team', y='assists', ax=axes[1])

## Heat Map

In [None]:
# generating 2-D 10x10 matrix of random numbers 
# from 1 to 100 
data = np.random.randint(low=1, 
                         high=100, 
                         size=(10, 10)) 
data

In [None]:
# setting the parameter values 
vmin = 1
vmax = 100

# plotting the heatmap 
hm = sns.heatmap(data=data,
                vmin=vmin, 
                vmax=vmax) 
  
# displaying the plotted heatmap 
plt.show() 

In [None]:
# setting the parameter values 
annot = True
  
# plotting the heatmap 
hm = sns.heatmap(data=data, 
                annot=annot) 
  
# displaying the plotted heatmap 
plt.show() 