Importing modules:

In [25]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import seaborn as sns

Load data about mileage using seaborn and show first 3 rows.

In [26]:
data = sns.load_dataset('mpg')
data.head(3)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite


Show statistics about numerical data.

In [27]:
data.describe()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year
count,398.0,398.0,398.0,392.0,398.0,398.0,398.0
mean,23.514573,5.454774,193.425879,104.469388,2970.424623,15.56809,76.01005
std,7.815984,1.701004,104.269838,38.49116,846.841774,2.757689,3.697627
min,9.0,3.0,68.0,46.0,1613.0,8.0,70.0
25%,17.5,4.0,104.25,75.0,2223.75,13.825,73.0
50%,23.0,4.0,148.5,93.5,2803.5,15.5,76.0
75%,29.0,8.0,262.0,126.0,3608.0,17.175,79.0
max,46.6,8.0,455.0,230.0,5140.0,24.8,82.0


Show description about categorical data.

In [28]:
data.describe(include = ['O'])

Unnamed: 0,origin,name
count,398,398
unique,3,305
top,usa,ford pinto
freq,249,6


Country of car production:

In [29]:
data.origin.value_counts()

usa       249
japan      79
europe     70
Name: origin, dtype: int64

Histogram with count of cars and fuel concumption:



In [30]:
fig_h6 = px.histogram(data_frame = data,
             x ='mpg',
             nbins = 25,
             title = 'Distribution about mpg')
fig_h6.show()

Histogram with probability of cars and fuel concumption:


In [31]:
fig_hist_4 = px.histogram(data_frame = data,
             x ='mpg',
             histnorm = 'probability density',
             nbins = 25,
             title = 'The probability density for cars mpg',
             facet_row = 'origin',
             color = 'cylinders')
fig_hist_4.show()

Remove form dataframe cars produced in japan:

In [32]:
n_df = data[data.origin != 'japan']
n_df.origin.value_counts()

usa       249
europe     70
Name: origin, dtype: int64

Histogram with count of origin:

In [33]:
fig_h3 = px.histogram(data_frame=n_df, x= 'origin' )
fig_h3.show()

Histogram with distribution of mpg dependent of origin:

In [34]:
fig_h2 = px.histogram(n_df, x = 'mpg', color= 'origin', marginal= 'rug',
             title = 'The distribution of mpg: (EU vs. USA)')
fig_h2.show()

Change axis of histogram:

In [35]:
fig_h = px.histogram(data_frame=n_df, y = 'mpg',color = 'origin',marginal = 'rug',
             orientation = 'h', title = 'The distribution of mpg: (EU vs. USA)')
fig_h.show()

Histogram with distribution of mpg and acceleration:

In [36]:
import plotly.figure_factory as ff
fig = ff.create_distplot(hist_data=[n_df.mpg,n_df.acceleration],
                   group_labels= ['mpg','acceleration'])
fig.show()

Histogram of two normal distribution:

In [37]:
import numpy as np
first = np.random.randn(10000)
second = np.random.randn(10000) + 3

fig_dist = ff.create_distplot(hist_data=[first,second],
                   group_labels= ['N(0,1)','N(3,1)'],
                bin_size = 0.4,
                curve_type = 'normal',
                colors = ['mediumblue','mediumpurple'])
fig_dist.show()

Two dimensional histogram with distribution of MPG and acceleration.


In [38]:
fig = go.Figure(go.Histogram2d(x = data.mpg, y = data.acceleration,
                         nbinsx = 30, nbinsy =30))
fig.add_trace(go.Scatter(x = data.mpg, y = data.acceleration,
                         mode = 'markers',
                         marker = {
                             'symbol': 'x',
                             'opacity': 0.7,
                             'color' : 'white',
                             'size': 8}))

fig.update_layout(
    title_text='The distribution of MPG and acceleration.', # title of plot
    xaxis_title_text='MPG', # xaxis label
    yaxis_title_text='Acceleration', # yaxis label
)

fig.show()