# Using Pandas

In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 200)
## to make it possible to display multiple output inside one cell 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

<b>load the data from the vehicles.csv file into pandas data frame

In [2]:
## Your Code here
cars = pd.read_csv("data/vehicles.csv")

First exploration of the dataset:

- How many observations does it have?
- Look at all the columns: do you understand what they mean?
- Look at the raw data: do you see anything weird?
- Look at the data types: are they the expected ones for the information the column contains?

In [3]:
## Your Code here
cars
cars.info()

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35952 entries, 0 to 35951
Data columns (total 15 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Make                     35952 non-null  object 
 1   Model                    35952 non-null  object 
 2   Year                     35952 non-null  int64  
 3   Engine Displacement      35952 non-null  float64
 4   Cylinders                35952 non-null  float64
 5   Transmission             35952 non-null  object 
 6   Drivetrain               35952 non-null  object 
 7   Vehicle Class            35952 non-null  object 
 8   Fuel Type                35952 non-null  object 
 9   Fuel Barrels/Year        35952 non-null  float64
 10  City MPG                 35952 non-null  int64  
 11  Highway MPG              35952 non-null  int64  
 12  Combined MPG             35952 non-null  int64  
 13  CO2 Emission Grams/Mile  35952 non-null  float64
 14  Fuel Cost/Year        

### Cleaning and wrangling data

- Some car brand names refer to the same brand. Replace all brand names that contain the word "Dutton" for simply "Dutton". If you find similar examples, clean their names too. Use `loc` with boolean indexing.

- Convert CO2 Emissions from Grams/Mile to Grams/Km

- Create a binary column that solely indicates if the transmission of a car is automatic or manual. Use `pandas.Series.str.startswith` and .

- convert MPG columns to km_per_liter

Note:
<br>Converting Grams/Mile to Grams/Km

1 Mile = 1.60934 Km

Converting Gallons to Liters

1 Gallon = 3.78541 Liters



In [4]:
## Your Code here
cars.loc[cars['Model'].str.contains("Dutton"), 'Model'] = "Dutton"

cars['CO2 Emission Grams/Mile'] = cars['CO2 Emission Grams/Mile'] * 1.60934
cars.rename(columns={'CO2 Emission Grams/Mile': 'CO2 Emission Grams/Km'}, inplace=True)

cars['Automatic or Manual'] = cars['Transmission'].str.startswith('A').map({True: 1, False: 0})
#1 for Automatic & 0 for manual

cars['City MPG'] = cars['City MPG'] * 3.78541
cars['Highway MPG'] = cars['Highway MPG'] * 3.78541
cars['Combined MPG'] = cars['Combined MPG'] * 3.78541
cars.rename(columns={'City MPG': 'City km_per_liter', 'Highway MPG': 'Highway km_per_liter', 'Combined MPG': 'Combined km_per_liter'}, inplace=True)

cars


Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City km_per_liter,Highway km_per_liter,Combined km_per_liter,CO2 Emission Grams/Km,Fuel Cost/Year,Automatic or Manual
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,68.13738,64.35197,64.35197,841.306152,1950,1
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,49.21033,49.21033,49.21033,1100.169583,2550,1
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,60.56656,64.35197,60.56656,893.887786,2100,1
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,49.21033,49.21033,49.21033,1100.169583,2550,1
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,52.99574,79.49361,60.56656,893.887786,2550,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,128.70394,143.84558,136.27476,392.678960,1100,1
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,128.70394,143.84558,136.27476,391.069620,1100,1
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,128.70394,143.84558,136.27476,392.678960,1100,1
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,128.70394,147.63099,136.27476,395.897640,1100,1


### Gathering insights:

- How many car makers are there? How many models? Which car maker has the most cars in the dataset?

- When were these cars made? How big is the engine of these cars?

- What's the frequency of different transmissions, drivetrains and fuel types?

- What's the car that consumes the least/most fuel?

In [14]:
# Your Code here
print(cars.nunique())

most_common_brand = cars['Model'].value_counts().idxmax()
print(most_common_brand)

model_year = cars[["Model", "Year"]]
print(model_year)

model_EngineDisplacement = cars[["Model", "Engine Displacement"]]
print(model_EngineDisplacement)

cars['Transmission'].value_counts()
cars['Drivetrain'].value_counts()
cars['Fuel Type'].value_counts()




Make                      127
Model                    3608
Year                       34
Engine Displacement        65
Cylinders                   9
Transmission               45
Drivetrain                  8
Vehicle Class              34
Fuel Type                  13
Fuel Barrels/Year         123
City km_per_liter          48
Highway km_per_liter       49
Combined km_per_liter      46
CO2 Emission Grams/Km     575
Fuel Cost/Year             55
Automatic or Manual         2
dtype: int64
F150 Pickup 2WD
                     Model  Year
0        DJ Po Vehicle 2WD  1984
1         FJ8c Post Office  1984
2      Post Office DJ5 2WD  1985
3      Post Office DJ8 2WD  1985
4                      GNX  1987
...                    ...   ...
35947         fortwo coupe  2013
35948         fortwo coupe  2014
35949         fortwo coupe  2015
35950         fortwo coupe  2016
35951         fortwo coupe  2016

[35952 rows x 2 columns]
                     Model  Engine Displacement
0        DJ Po Vehicl

Automatic 4-spd                     10585
Manual 5-spd                         7787
Automatic (S6)                       2631
Automatic 3-spd                      2597
Manual 6-spd                         2423
Automatic 5-spd                      2171
Automatic 6-spd                      1432
Manual 4-spd                         1306
Automatic (S8)                        960
Automatic (S5)                        822
Automatic (variable gear ratios)      675
Automatic 7-spd                       662
Automatic (S7)                        261
Auto(AM-S7)                           256
Automatic 8-spd                       243
Automatic (S4)                        229
Auto(AM7)                             157
Auto(AV-S6)                           145
Auto(AM6)                             110
Auto(AM-S6)                            92
Automatic 9-spd                        90
Manual 3-spd                           74
Manual 7-spd                           68
Auto(AV-S7)                       

Front-Wheel Drive             13044
Rear-Wheel Drive              12726
4-Wheel or All-Wheel Drive     6503
All-Wheel Drive                2039
4-Wheel Drive                  1058
2-Wheel Drive                   423
Part-time 4-Wheel Drive         158
2-Wheel Drive, Front              1
Name: Drivetrain, dtype: int64

Regular                        23587
Premium                         9921
Gasoline or E85                 1195
Diesel                           911
Premium or E85                   121
Midgrade                          74
CNG                               60
Premium and Electricity           20
Gasoline or natural gas           20
Premium Gas or Electricity        17
Regular Gas and Electricity       16
Gasoline or propane                8
Regular Gas or Electricity         2
Name: Fuel Type, dtype: int64

<b> (Optional)

What brand has the worse CO2 Emissions on average?

Hint: use the function `sort_values()`

In [15]:
## your Code here
cars.sort_values(by=['CO2 Emission Grams/Km'])

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City km_per_liter,Highway km_per_liter,Combined km_per_liter,CO2 Emission Grams/Km,Fuel Cost/Year,Automatic or Manual
3071,BMW,i3 REX,2016,0.6,2.0,Automatic (A1),Rear-Wheel Drive,Subcompact Cars,Premium Gas or Electricity,1.563190,155.20181,140.06017,147.63099,59.545580,1050,1
3069,BMW,i3 REX,2014,0.6,2.0,Auto(A1),Rear-Wheel Drive,Subcompact Cars,Premium Gas or Electricity,1.563190,155.20181,140.06017,147.63099,64.373600,1050,1
3070,BMW,i3 REX,2015,0.6,2.0,Automatic (A1),Rear-Wheel Drive,Subcompact Cars,Premium Gas or Electricity,1.563190,155.20181,140.06017,147.63099,64.373600,1050,1
7916,Chevrolet,Volt,2016,1.5,4.0,Automatic (variable gear ratios),Front-Wheel Drive,Compact Cars,Regular Gas or Electricity,2.006844,162.77263,158.98722,158.98722,82.076340,800,1
7917,Chevrolet,Volt,2017,1.5,4.0,Automatic (variable gear ratios),Front-Wheel Drive,Compact Cars,Regular Gas or Electricity,2.006844,162.77263,158.98722,158.98722,82.076340,800,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20897,Lamborghini,Countach,1989,5.2,12.0,Manual 5-spd,Rear-Wheel Drive,Two Seaters,Premium,47.087143,22.71246,37.85410,26.49787,2043.172083,5800,0
20894,Lamborghini,Countach,1986,5.2,12.0,Manual 5-spd,Rear-Wheel Drive,Two Seaters,Premium,47.087143,22.71246,37.85410,26.49787,2043.172083,5800,0
20898,Lamborghini,Countach,1990,5.2,12.0,Manual 5-spd,Rear-Wheel Drive,Two Seaters,Premium,47.087143,22.71246,37.85410,26.49787,2043.172083,5800,0
20896,Lamborghini,Countach,1988,5.2,12.0,Manual 5-spd,Rear-Wheel Drive,Two Seaters,Premium,47.087143,22.71246,37.85410,26.49787,2043.172083,5800,0


Do cars with automatic transmission consume more fuel than cars with manual transmission on average?

In [24]:
## Your Code is here 
cars.sort_values(by=['Combined km_per_liter','Automatic or Manual'])

#if 'Automatic or Manual' == True


Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City km_per_liter,Highway km_per_liter,Combined km_per_liter,CO2 Emission Grams/Km,Fuel Cost/Year,Automatic or Manual
20894,Lamborghini,Countach,1986,5.2,12.0,Manual 5-spd,Rear-Wheel Drive,Two Seaters,Premium,47.087143,22.71246,37.85410,26.49787,2043.172083,5800,0
20895,Lamborghini,Countach,1987,5.2,12.0,Manual 5-spd,Rear-Wheel Drive,Two Seaters,Premium,47.087143,22.71246,37.85410,26.49787,2043.172083,5800,0
20896,Lamborghini,Countach,1988,5.2,12.0,Manual 5-spd,Rear-Wheel Drive,Two Seaters,Premium,47.087143,22.71246,37.85410,26.49787,2043.172083,5800,0
20897,Lamborghini,Countach,1989,5.2,12.0,Manual 5-spd,Rear-Wheel Drive,Two Seaters,Premium,47.087143,22.71246,37.85410,26.49787,2043.172083,5800,0
20898,Lamborghini,Countach,1990,5.2,12.0,Manual 5-spd,Rear-Wheel Drive,Two Seaters,Premium,47.087143,22.71246,37.85410,26.49787,2043.172083,5800,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17524,Honda,Insight,2001,1.0,3.0,Manual 5-spd,Front-Wheel Drive,Two Seaters,Regular,6.219057,181.69968,227.12460,200.62673,269.852917,650,0
17526,Honda,Insight,2002,1.0,3.0,Manual 5-spd,Front-Wheel Drive,Two Seaters,Regular,6.219057,181.69968,223.33919,200.62673,269.852917,650,0
17528,Honda,Insight,2003,1.0,3.0,Manual 5-spd,Front-Wheel Drive,Two Seaters,Regular,6.219057,181.69968,223.33919,200.62673,269.852917,650,0
33279,Toyota,Prius Eco,2016,1.8,4.0,Automatic (variable gear ratios),Front-Wheel Drive,Midsize Cars,Regular,5.885893,219.55378,200.62673,211.98296,254.275720,600,1
