# Creating Statistical Charts using Plotly

In [1]:
import numpy as np 
import pandas as pd 
import cufflinks as cf 
import chart_studio.plotly  as py 
import plotly.tools as tls 
import plotly.graph_objs as go
import plotly.express as px 

## Creating Histograms

In [5]:
mtcars_source = '../../inputs/mtcars.csv'
cars = pd.read_csv(mtcars_source)

mpg = cars.mpg

In [8]:
hist_fig = px.histogram(mpg, x='mpg')
hist_fig.show()

### Multiple/Overlaid Histograms

In [65]:
cars_subset = cars[['mpg', 'disp', 'hp']]

# Overlaid Histogram
over_hist_fig = go.Figure()
over_hist_fig.add_trace(go.Histogram(x=cars.mpg, name='mpg'))
over_hist_fig.add_trace(go.Histogram(x=cars.disp, name='disp'))
over_hist_fig.add_trace(go.Histogram(x=cars.hp, name='hp'))

# Overlay both histograms
over_hist_fig.update_layout(barmode='overlay', title='Multiple/Overlaid Histograms')
# Reduce opacity to see both histograms
over_hist_fig.update_traces(opacity=0.75)
over_hist_fig.show()



In [62]:
from sklearn.preprocessing import StandardScaler

# Standardize features by removing the mean and scaling to unit variance
cars_data = pd.DataFrame(StandardScaler().fit_transform(cars_subset), columns=list(cars_subset.columns))

# Overlaid Histogram
over_hist_fig = go.Figure()
over_hist_fig.add_trace(go.Histogram(x=cars_data.mpg, name='mpg', marker_color='orange'))
over_hist_fig.add_trace(go.Histogram(x=cars_data.disp, name='disp', marker_color='blue'))
over_hist_fig.add_trace(go.Histogram(x=cars_data.hp, name='hp', marker_color='green'))

# Overlay both histograms
over_hist_fig.update_layout(barmode='overlay', title='Multiple/Overlaid Histograms')
# Reduce opacity to see both histograms
over_hist_fig.update_traces(opacity=0.75)
over_hist_fig.show()

### Subplot Histogram

In [41]:
from plotly.subplots import make_subplots

subplot_fig = make_subplots(rows=2, cols=2)

mpg_trace = go.Histogram(x=cars_data.mpg, name='mpg', marker_color='orange')
disp_trace = go.Histogram(x=cars_data.disp, name='disp', marker_color='blue')
hp_trace = go.Histogram(x=cars_data.hp, name='hp', marker_color='green')

subplot_fig.append_trace(mpg_trace, 1, 1)
subplot_fig.append_trace(disp_trace, 1, 2)
subplot_fig.append_trace(hp_trace, 2, 1)

subplot_fig.update_layout(title='Subplot Histogram')

subplot_fig.show()

## Creating Box Plots

In [61]:
box_fig = go.Figure()

box_fig.add_trace(go.Box(y=cars_data.mpg, name='mpg', marker_color='orange'))
box_fig.add_trace(go.Box(y=cars_data.disp, name='disp', marker_color='blue'))
box_fig.add_trace(go.Box(y=cars_data.hp, name='hp', marker_color='green'))

box_fig.update_layout(title='Box Plot')

box_fig.show()

## Creating Scatter Plot

In [60]:
scatter_fig = go.Figure()

scatter_fig.add_trace(go.Scatter(x=cars_data.mpg, y=cars_data.disp, mode='markers', name='mpg', marker_color='orange'))
scatter_fig.add_trace(go.Scatter(x=cars_data.hp, y=cars_data.disp, mode='markers', name='hp', marker_color='green'))

scatter_fig.update_layout(title='Scatter Plot', yaxis={'title': 'Standardized Displacement'})
#scatter_fig = px.scatter(cars_data, x='hp', y='mpg')
scatter_fig.show()

# The more HP the less Miles Per Gallon you have
