In [2]:
import pandas as pd 
import numpy as np
import plotly.express as px
import plotly.graph_objects as go


In [3]:
df = pd.read_csv('../data_samples/FuelConsumption.csv')
df.head()

Unnamed: 0,MODELYEAR,MAKE,MODEL,VEHICLECLASS,ENGINESIZE,CYLINDERS,TRANSMISSION,FUELTYPE,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
0,2014,ACURA,ILX,COMPACT,2.0,4,AS5,Z,9.9,6.7,8.5,33,196
1,2014,ACURA,ILX,COMPACT,2.4,4,M6,Z,11.2,7.7,9.6,29,221
2,2014,ACURA,ILX HYBRID,COMPACT,1.5,4,AV7,Z,6.0,5.8,5.9,48,136
3,2014,ACURA,MDX 4WD,SUV - SMALL,3.5,6,AS6,Z,12.7,9.1,11.1,25,255
4,2014,ACURA,RDX AWD,SUV - SMALL,3.5,6,AS6,Z,12.1,8.7,10.6,27,244


In [4]:
fig = px.histogram(df , x='CO2EMISSIONS')
fig.show()

In [5]:
fig = px.histogram(
    df , 
    x='CO2EMISSIONS' ,
    color= 'ENGINESIZE',
    labels={'CO2EMISSIONS' :'co2'},
    nbins=40,
    title='CO2 Emission Histogram'
)
fig.show()

In [6]:
fig = px.box(
    df,
    x='ENGINESIZE',
    y='CO2EMISSIONS',
    color="CYLINDERS",
)
fig.update_layout(
    title='CO2 Emissions vs Engine Size by Cylinders',
    xaxis_title='Engine Size',
    yaxis_title='CO2 Emissions',
    legend_title='Cylinders',
)
fig.show()

In [7]:
fig = px.strip(
    df,
    x='ENGINESIZE',
    y="CO2EMISSIONS",
    color="CYLINDERS",
    facet_col="FUELTYPE",
    facet_col_wrap=2,
    facet_row_spacing=0.05,
)
fig.show()

In [8]:
avg_size=df['ENGINESIZE'].mean()
fig = px.histogram(
    df,
    x='ENGINESIZE',
    y='CO2EMISSIONS',
    color='CYLINDERS',
    nbins=40
)
fig.add_vline(x=avg_size , annotation_text=f'Average Engine Size ${avg_size}', line_dash='dash', line_color='red')

In [9]:
fig = px.scatter(
    df.query("CYLINDERS > 5"),
    x='ENGINESIZE',
    y='CO2EMISSIONS',
    color="CYLINDERS",
    size="CYLINDERS",
    hover_name="MAKE",
)
fig.show()

In [10]:
fig = px.scatter_matrix(df , dimensions=['ENGINESIZE' , 'CO2EMISSIONS'] , color='CYLINDERS')
fig.show()

In [11]:
avg_by_model = df.groupby(['ENGINESIZE'] , as_index=False).aggregate({'CO2EMISSIONS':'mean'}).round(2)
avg_by_model

Unnamed: 0,ENGINESIZE,CO2EMISSIONS
0,1.0,148.67
1,1.2,150.75
2,1.3,143.0
3,1.4,180.2
4,1.5,163.08
5,1.6,190.63
6,1.8,180.46
7,2.0,207.17
8,2.1,212.0
9,2.4,220.38


In [12]:
fig = px.scatter(avg_by_model , x='ENGINESIZE' , y='CO2EMISSIONS' , trendline='ols')
fig.show()

In [13]:
fig = px.scatter(avg_by_model , x='ENGINESIZE' , y='CO2EMISSIONS' , trendline='lowess')
fig.show()

In [14]:
fig = px.sunburst(
    df,
    path=['ENGINESIZE' , 'CO2EMISSIONS']
)
fig.show()

In [15]:
fig = px.density_heatmap(df , x='ENGINESIZE' , y='CYLINDERS')
fig.show()

In [16]:
fig = px.histogram(
    df , x='ENGINESIZE' , facet_col='CYLINDERS' , facet_col_wrap=4 , color='CYLINDERS' , 
    title='Histogram of Engine Size by Cylinders' , labels={'ENGINESIZE':'Engine Size'}
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.show()

In [17]:
fig = px.strip(
    df , x="ENGINESIZE" , y='CO2EMISSIONS' , color="CYLINDERS"
)
fig.update_yaxes(title="CO2 Emissions")
fig.update_xaxes(title="Engine Size")
fig.show()