# Demo 4.5: Simple Bar Charts with plotly.express

- **Question:  What is the Average City MPG By Vehicle Type?**  

- Vertical Bar Charts  
- Horizonal Bar Charts  

---

In [2]:
import pandas as pd 
import plotly.express as px

# Read the datafile File into a *pandas* Dataframe  

In [3]:
df = pd.read_csv('Cars.csv')

print(df.shape)
df.head(2)

(428, 13)


Unnamed: 0,Vehicle_Make,Vehicle_Model,Vehicle_Type,Manufacturing_Origin,MPG_City,MPG_Hwy,MSRP,Invoice,Weight,Wheelbase,DriveTrain,EngineSize,Horsepower
0,Acura,MDX,SUV,Asia,17,23,36945,33337,4451,106,All,3.5,265
1,Acura,RSX Type S 2dr,Sedan,Asia,24,31,23820,21761,2778,101,Front,2.0,200


# Clean the Data  

### Change Data Types as Needed  

In [4]:
# data Vehicle_Types 'Before' 
df.dtypes

Vehicle_Make             object
Vehicle_Model            object
Vehicle_Type             object
Manufacturing_Origin     object
MPG_City                  int64
MPG_Hwy                   int64
MSRP                      int64
Invoice                   int64
Weight                    int64
Wheelbase                 int64
DriveTrain               object
EngineSize              float64
Horsepower                int64
dtype: object

In [5]:
# Convert MSRP, Invoice, MPG_City, MPG_Hwy to floats
df['MSRP'] = df['MSRP'].astype(float)
df['Invoice'] = df['Invoice'].astype(float)

df['MPG_City'] = df['MPG_City'].astype(float)
df['MPG_Hwy'] = df['MPG_Hwy'].astype(float)

In [6]:
# data Vehicle_Types 'After' 
df.dtypes

Vehicle_Make             object
Vehicle_Model            object
Vehicle_Type             object
Manufacturing_Origin     object
MPG_City                float64
MPG_Hwy                 float64
MSRP                    float64
Invoice                 float64
Weight                    int64
Wheelbase                 int64
DriveTrain               object
EngineSize              float64
Horsepower                int64
dtype: object

# Aggregate the Data  
- Categorical Variable to Group On:  **Vehicle_Type**  
- Continuous Variable We're Interested In:  **MPG_City** 
- Aggregation Function:  **mean** 


In [7]:
df.head(2)

Unnamed: 0,Vehicle_Make,Vehicle_Model,Vehicle_Type,Manufacturing_Origin,MPG_City,MPG_Hwy,MSRP,Invoice,Weight,Wheelbase,DriveTrain,EngineSize,Horsepower
0,Acura,MDX,SUV,Asia,17.0,23.0,36945.0,33337.0,4451,106,All,3.5,265
1,Acura,RSX Type S 2dr,Sedan,Asia,24.0,31.0,23820.0,21761.0,2778,101,Front,2.0,200


In [8]:
# Optional:  Display the unique values of the column we want to Group on
df['Vehicle_Type'].unique()

array(['SUV', 'Sedan', 'Sports', 'Wagon', 'Truck', 'Hybrid'], dtype=object)

### Group on One Column:  *Vehicle_Type* 


In [9]:
df.columns

Index(['Vehicle_Make', 'Vehicle_Model', 'Vehicle_Type', 'Manufacturing_Origin',
       'MPG_City', 'MPG_Hwy', 'MSRP', 'Invoice', 'Weight', 'Wheelbase',
       'DriveTrain', 'EngineSize', 'Horsepower'],
      dtype='object')

In [10]:
categories_to_groupby = ["Vehicle_Type"] 
measures_to_group = ['MPG_City', 'MPG_Hwy']

ser = df.groupby(categories_to_groupby)[measures_to_group].mean()

ser

Unnamed: 0_level_0,MPG_City,MPG_Hwy
Vehicle_Type,Unnamed: 1_level_1,Unnamed: 2_level_1
Hybrid,55.0,56.0
SUV,16.1,20.5
Sedan,21.083969,28.629771
Sports,18.408163,25.489796
Truck,16.5,21.0
Wagon,21.1,27.9


### Convert the pandas ***Series*** to a Dataframe  

In [11]:
print("ser is a " , type(ser))

# Check if it is a Series or Dataframe and name accordingly
if (isinstance(ser,pd.Series)): 
    print('That is a Series')
    df_grouped = ser.to_frame()
else:
    print("Not a Series but a Dataframe")
    df_grouped = ser
    
print(df_grouped.shape)
df_grouped.head()


ser is a  <class 'pandas.core.frame.DataFrame'>
Not a Series but a Dataframe
(6, 2)


Unnamed: 0_level_0,MPG_City,MPG_Hwy
Vehicle_Type,Unnamed: 1_level_1,Unnamed: 2_level_1
Hybrid,55.0,56.0
SUV,16.1,20.5
Sedan,21.083969,28.629771
Sports,18.408163,25.489796
Truck,16.5,21.0


### Move the Index Column into the Dataframe  

In [12]:
df_grouped.reset_index(inplace=True)

print("3.3 Demo:  Grouping on One Column")
print(df_grouped.shape)
df_grouped.head()

3.3 Demo:  Grouping on One Column
(6, 3)


Unnamed: 0,Vehicle_Type,MPG_City,MPG_Hwy
0,Hybrid,55.0,56.0
1,SUV,16.1,20.5
2,Sedan,21.083969,28.629771
3,Sports,18.408163,25.489796
4,Truck,16.5,21.0


# Plot the Data    

### Vertical Bar Chart  

In [14]:
fig = px.bar(df_grouped, x='Vehicle_Type', y='MPG_City', title='MPG_City by Vehicle Type')
fig.show()

### Horizontal Bar Chart  
   

In [16]:
px.bar(df_grouped, x='MPG_City', y='Vehicle_Type', title='MPG Hwy by Vehicle Type')

# Display *Templates*  

- ggplot2  
- seaborn  
- simple_white  
- plotly  
- plotly_white  
- plotly_dark  
- presentation  
- xgridoff  
- ygridoff  
- gridon  
- none