# Data Visualization - Basics

## Import Libraries


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from bokeh.plotting import figure, output_file, show
from bokeh.palettes import magma

## Seaborn

In [None]:
# set up seaborn settings
sns.set_style(style='whitegrid') 

In [None]:
# import data
tips_df = sns.load_dataset('tips')
flowers_df = sns.load_dataset('iris')
flights_df = sns.load_dataset('flights').pivot('month', 'year', 'passengers')

In [None]:
# scatterplot
plt.figure(figsize=(12, 6))
plt.title('Sepal Dimensions')
sns.scatterplot(x=flowers_df.sepal_length, y=flowers_df.sepal_width, hue=flowers_df.species, s=70)

In [None]:
# lineplot
plt.figure(figsize=(12, 6))
plt.title('Sepal Lengths')
sns.lineplot(x=flowers_df.species, y=flowers_df.sepal_length)

In [None]:
# barplot
plt.figure(figsize=(12, 6))
plt.title('Barplot')
sns.barplot(x='day', y='total_bill', hue='sex', data=tips_df)
plt.show()

In [None]:
# heatmap
plt.title('No of Passengers (1000s)')
sns.heatmap(flights_df, fmt='d', annot=True, cmap='Blues')

In [None]:
# histplot
sns.histplot(x='total_bill', data=tips_df, kde=True, hue='sex')
plt.show()

## Matplotlib

In [None]:
# single lineplot
x = range(2020, 2026)
y = [0.333, 0.777, 0.5, 0.8, 0.46, 0.467]
plt.figure(figsize = (12,6))
plt.plot(x, y)
plt.xlabel('Year')
plt.ylabel('Value')
plt.show()

In [None]:
# multiple lineplot
eur = y
usd = [0.9, 0.777, 0.2, 0.7, 0.32, 0.111]
plt.plot(x, eur, marker='x')
plt.plot(x, usd, marker='o')
plt.xlabel('Year')
plt.ylabel('Value')
plt.title('Currency Values')
plt.legend(['EUR', 'USD'])

In [None]:
# single barplot
plt.bar(x, y)
plt.xlabel('Year')
plt.ylabel('Value')
plt.title('Currency Values')

In [None]:
# multiple barplot
plt.bar(x, eur)
plt.bar(x, usd, bottom=eur)

In [None]:
# single histogram
plt.title('Distribution of Sepal Width')
plt.hist(flowers_df.sepal_width, bins=np.arange(2, 5, 0.25))

In [None]:
# overlap histogram
setosa_df = flowers_df[flowers_df.species == 'setosa']
versicolor_df = flowers_df[flowers_df.species == 'versicolor']
virginica_df = flowers_df[flowers_df.species == 'virginica']
plt.hist(setosa_df.sepal_width, alpha = 0.4, bins = np.arange(2,5,0.25))
plt.hist(versicolor_df.sepal_width, alpha = 0.4, bins = np.arange(2,5,0.25))

In [None]:
# stacked histogram
plt.title('Distribution of Sepal Width')
plt.hist([setosa_df.sepal_width, versicolor_df.sepal_width, virginica_df.sepal_width], bins=np.arange(2,5,0.25), stacked = True)
plt.legend(['Setosa', 'Versicolor', 'Virginica'])

## Bokeh

In [None]:
# scatterplot
graph = figure(title = "Bokeh Scatter Graph")
color = magma(256)
graph.scatter(tips_df['total_bill'], tips_df['tip'], color=color)
show(graph) # displaying the model

In [None]:
# barplot 
graph = figure(title = "Bokeh Bar Chart")
graph.vbar(tips_df['total_bill'], top=tips_df['tip'], legend_label = "Bill VS Tips", color='green')
graph.vbar(tips_df['tip'], top=tips_df['size'], legend_label = "Tips VS Size", color='red')
graph.legend.click_policy = "hide"
show(graph) # displaying the model