# Pandas: grouping

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [14]:
df = pd.read_csv("vehicles.csv")

# How many car models? 

In [15]:
df['Model'].value_counts().count()

3608

# Group by brand

In [16]:
df.groupby('Make').Model.count()

Make                        127
Model                      3608
Year                         34
Engine Displacement          65
Cylinders                     9
Transmission                 45
Drivetrain                    8
Vehicle Class                34
Fuel Type                    13
Fuel Barrels/Year           123
City MPG                     48
Highway MPG                  49
Combined MPG                 46
CO2 Emission Grams/Mile     575
Fuel Cost/Year               55
dtype: int64

In [20]:
# OR
df.Make.nunique()

127

*Converting Grams/Mile to Grams/Km 1 Mile = 1.60934 Km*

*Converting Gallons to Liters: 1 Gallon = 3.78541 Liters*

# Brand with the most cars?

In [26]:
# group by brand and count models of each brand
most_models = df.groupby('Make')[['Model']].count()
most_models.sort_values('Model')

Unnamed: 0_level_0,Model
Make,Unnamed: 1_level_1
Mahindra,1
General Motors,1
Qvale,1
Fisker,1
S and S Coach Company E.p. Dutton,1
...,...
Toyota,1836
GMC,2347
Dodge,2360
Ford,2946


# Average CO2_Emission_Grams/Km by brand

In [23]:
df['CO2 Emission Grams/KM'] = df['CO2 Emission Grams/Mile']/1.60934
df.groupby(['Make'])[['CO2 Emission Grams/KM']].mean().sort_values('CO2 Emission Grams/KM').tail()

Unnamed: 0_level_0,CO2 Emission Grams/KM
Make,Unnamed: 1_level_1
Laforza Automobile Inc,502.012683
Bugatti,542.497235
Superior Coaches Div E.p. Dutton,552.213951
S and S Coach Company E.p. Dutton,552.213951
Vector,651.919248


# (Optional) 

Use `pd.cut` or `pd.qcut` to create 4 groups (bins) of cars, by Year. We want to explore how cars have evolved decade by decade.

In [27]:
df.Year.describe()

count    35952.00000
mean      2000.71640
std         10.08529
min       1984.00000
25%       1991.00000
50%       2001.00000
75%       2010.00000
max       2017.00000
Name: Year, dtype: float64

In [4]:
df['bins'] = pd.cut(x = df.Year, bins = [1980, 1990, 2000, 2010, 2020])
years = df.groupby('bins')['Year'].count()
years

bins
(1980, 1990]     7926
(1990, 2000]     9169
(2000, 2010]    10866
(2010, 2020]     7991
Name: Year, dtype: int64

### Did cars consume more gas in the eighties?

show the average City_Km/Liter by year_range

In [None]:
df.groupby('bins')[['City MPG']].mean() # yes

### Which brands are more environment friendly?

In [None]:
df.groupby('Make')[['CO2 Emission Grams/Mile']].min()

### Does the drivetrain affect fuel consumption?

In [None]:
df.groupby('Drivetrain')[['Fuel Barrels/Year']].mean().plot(kind='barh')

### Do cars with automatic transmission consume more fuel than cars with manual transmission?

Use `groupby` and `agg` with different aggregation measures for different columns:

In [11]:
df.Transmission = df.Transmission.apply(lambda x: 'Automatic' if 'Automatic' in x else 'Manual')

In [9]:
df.groupby('Transmission')[['Fuel Barrels/Year']].mean() # yes

Unnamed: 0_level_0,Fuel Barrels/Year
Transmission,Unnamed: 1_level_1
Automatic,18.156082
Manual,16.588264


aggregate with average City_Km/Liter and the count of the Trans

In [None]:
## your code is here

aggregate with average City_Km/Liter and the minimum of the Trans

In [None]:
### your code is here

## Off topic: Groupby method

In [None]:
# learn groupby
l = [[1, 2, 1], [5, 2, 1], [2, 1, 1], [1, 2, 1]]
df = pd.DataFrame(l, columns=["a", "b", "c"])

df.groupby(["a"]).count()
# it basically just counts how many 1's, 2's and 5's there are in the 'a' column
# it is not counting the other columns at all
# just replicates the result

## Off topic: Filtering

In [None]:
df[df.Drivetrain == '2-Wheel Drive']['Fuel Barrels/Year'].mean()

In [None]:
"data": [     {       "type": "brands",       "id": "3438",       "attributes": {         "slug": "088-by-phillip-airaud",         "name": "0.88 by Phillip Airaud"       },       "links": {         "self": "/brands/3438",         "products": "/products/?brands=3438",         "follows": "/follows/?brand_id=3438",         "web": "https://gcp-staging.wdn.io/brands/088-by-phillip-airaud"       }     },