In [None]:
# pip install seaborn

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# to allows plots displayed directly in the output cells of the notebook, instead of in a separate window. 
%matplotlib inline

The "tips" dataset in Seaborn is a built-in dataset that provides information about different aspects of tips received by servers at a restaurant. This dataset contains information such as total bill amount, tip amount, time of day, day of the week, and gender of the person paying the bill. This dataset is used in various data analysis and visualizations tutorials and examples to demonstrate the functionality of Seaborn library.
* total_bill: total bill (cost) of the meal, in US dollars
* tip: tip (gratuity) left by the diners, in US dollars
* sex: gender of the person who paid the bill ("Male" or "Female")
* smoker: smoker or non-smoker (Yes or No)
* day: day of the week (Thur, Fri, Sat, Sun)
* time: time of the meal (Lunch or Dinner)
* size: number of diners

In [None]:
tips = sns.load_dataset('tips')
tips.head(10).tail(2)

In [None]:
pd.crosstab(tips['sex'], tips['day'])

# Countplot
This is similar to the barplot in R with the following common options (for me at least):
- data: DataFrame or array-like
- x: Categorical column name or vector
- y: Column name or vector for vertical plotting.
- hue: Column name or vector for color encoding
- order: Order to plot the categorical levels in
- hue_order: Order to plot the categorical levels of the hue variable in
- palette: Color palette name or list of colors

A palette is a sequence of colors that can be used to define the colors of the visual elements in a plot. There is a variety of built-in color palettes in seasborn or we can create a custom palette. Some examples of built-in color palettes in seaborn are: *deep*, *muted*, *bright*, *dark*, *pastel*, etc. You can access these palettes using the "color_palette()" function, e.g. `sns.color_palette('bright')` or using directly the name of the palette as an argument in the plotting functions.
- color: Color for all of the elements, or seed for a gradient palette
- dodge: Shift bars along the categorical axis. If you set `dodge=True` each category has its own space.
- linewidth: The width of the line in points
- alpha: The transparency of the fill color.

In [None]:
# Countplot is a barplot showing the counts of observations in each categories of the variable.
sns.countplot(data = tips, x = 'sex')
plt.show()

In [None]:
# using hue to specify a second categorical variable to group the data by and add additional color distinction to the plot.
sns.countplot(data = tips,
              x = 'sex',
              hue ='day',
              order = ['Female', 'Male'],
              palette = "husl",
              dodge = True,
              linewidth = 1,
              alpha = 0.6
             )
plt.show()

# Histogram
Using `histplot` from seaborn with the following options:
- data: The dataset that you want to plot.
- x: The name of the column in data that should be used for the x-axis.
- bins: The number of bins to use in the histogram.
- hist: Whether to show the histogram.
- kde: Whether to show the kernel density estimate (KDE) plot.
- kde_kws: A dictionary of additional keyword arguments to pass to the kdeplot function.
- rug: Whether to show the rug plot (vertical lines representing individual observations).
- fit: The type of distribution to fit to the data.
- color: The color to use for the histogram and KDE plot.
- fill: Whether to fill the histogram.
- line_kws: A dictionary of additional keyword arguments to pass to the plot function for the KDE plot.
- ax: The matplotlib axes to use for the plot. If not provided, a new axes will be created.

In [None]:
sns.histplot(data = tips['total_bill'], 
             bins = 5,
             binwidth= 2,
             kde = True,
             color = "red",
             fill = "blue",
             line_kws = {'color': 'red', 
                         'linewidth': 2,
                        'linestyle': "-."}
            ) 
plt.show()

# Scatter Plot
To make a scatterplot with the options:
- x and y: the x and y variables that are used to create the scatter plot
- hue: the column in the data used to color-code the points in the scatter plot
- style: the column in the data used to style-code the points in the scatter plot
- size: the column in the data used to size-code the points in the scatter plot
- palette: the color palette used for the hue or style encoding
- markers: the marker style used for plotting the points
- alpha: the transparency of the plotted points
- legend: whether to show the legend or not

In [None]:
sns.scatterplot(data = tips,
                x = 'total_bill', 
                y = 'tip',
                hue = 'sex',
                palette = 'dark',
                style = 'smoker',
                alpha = 0.7,
                legend = True,
#                 markers = '.' # shouldn't use this together with style
               )
plt.show()

## Pairwise Plot

In [None]:
sns.pairplot(tips) ; plt.show()

In [None]:
sns.pairplot(tips, 
             hue = 'sex', 
             palette = 'muted')
plt.show()

## Join Plot

In [None]:
sns.jointplot(x='total_bill',y='tip',data=tips,kind='scatter')
plt.show()

In [None]:
sns.jointplot(x='total_bill',y='tip',data=tips,kind='reg')

# Boxplot

Some good options
- x and y: These parameters specify the variables to be plotted on the x and y axes, respectively.
- data: The data to be plotted.
- color: The color of the box plot elements.
- palette: The color palette to be used for the boxes.
- width: The width of the boxes.
- fliersize: The size of the fliers (outliers) in the plot.
- notch: If True, plots a notch in the boxes.
- saturation: The saturation level of the colors in the plot.
- linewidth: The width of the lines in the plot.
- order: The order in which the categories will be plotted.
- showmeans: If True, plots a line at the mean value of each category.

In [None]:
sns.boxplot(data = tips, 
            x = "day", 
            y = "total_bill", 
            palette = 'rainbow',
            width = 0.4, 
            fliersize = 3,
            notch = True,
            linewidth = 0.5,
            order = "Sun Sat Fri Thur".split(),
            saturation = 0.2,
            showmeans = True,
           )
plt.show()

To make frames and include multiple charts

In [None]:
# plt.subplot(nrows, ncols, plot_number)
plt.subplot(1,2,1)
sns.countplot(x='sex',data=tips) 
plt.subplot(1,2,2)
sns.countplot(x='day',data=tips,);

# Heat Maps
This can be very useful when doing time-space diagrams, or correlation plots, or anything else that changes between two different variables.
`sns.heatmap(data, cmap=None, center=None, annot=False, fmt=".2g", square=False, linewidths=0.5, cbar_kws=None, **kwargs)`
- data: 2D array-like object or a DataFrame (required)
- cmap: the colormap you want to use (default is "viridis"). Some options: coolwarm, magma, inferno, plasma, Greys, Blues, etc.
- center: the value at which you want to center the colormap (default is None)
- annot: whether to annotate the heatmap with the numeric values (default is False)
- fmt: the string format for the annotations (default is ".2g")
- square: whether to make the heatmap square (default is False)
- linewidths: the width of the lines that will divide each cell in the heatmap (default is 0.5)
- cbar_kws: a dictionary of keyword arguments for the colorbar

In [None]:
sns.heatmap(data = tips.iloc[:,[0, 1, 6]].corr(),
            cmap = 'Greens', 
            annot = True, 
            fmt = ".5f",
            square = False, 
            linewidths = 0.2, 
            linecolor = "black",
            cbar_kws = None,)
plt.show()