## 1.4 Selecting and Viewing Data with Pandas

In [1]:
import pandas as pd

In [2]:
car_sales = pd.read_csv("car-sales.csv")

**.head()**

In [3]:
# The .head function will return the top 5 rows of data frame
# 5 is a default number; you can select a specific number of rows in the parentheses
car_sales.head(2)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"


**.tail()**

In [4]:
# The .tail function will # will return the bottom 5 rows of data frame
# 5 is a default number; you can select a specific number of rows in the parentheses
car_sales.tail(1)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
9,Nissan,White,31600,4,"$9,700.00"


**.loc[ ] vs .ilock[ ]**

In [5]:
animals = pd.Series(["cat", "dog", "bird", "panda", "snake"], index=[0, 3, 9, 8, 3])
animals

0      cat
3      dog
9     bird
8    panda
3    snake
dtype: object

In [6]:
# .loc refers to assigned index number
animals.loc[3]

3      dog
3    snake
dtype: object

In [7]:
animals.loc[9]

'bird'

In [8]:
car_sales.loc[3]

Make                    BMW
Colour                Black
Odometer (KM)         11179
Doors                     5
Price            $22,000.00
Name: 3, dtype: object

In [9]:
# .iloc refers to position
animals.iloc[3]

'panda'

In [10]:
car_sales.iloc[3]

Make                    BMW
Colour                Black
Odometer (KM)         11179
Doors                     5
Price            $22,000.00
Name: 3, dtype: object

**Both .loc[ ] and .iloc[ ] allow slicing**

In [12]:
animals.iloc[:3]

0     cat
3     dog
9    bird
dtype: object

In [13]:
car_sales.loc[:3]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"


**To view an individual column:**

In [14]:
car_sales["Colour"]

0    White
1      Red
2     Blue
3    Black
4    White
5    Green
6     Blue
7     Blue
8    White
9    White
Name: Colour, dtype: object

In [20]:
car_sales.Colour

0    White
1      Red
2     Blue
3    Black
4    White
5    Green
6     Blue
7     Blue
8    White
9    White
Name: Colour, dtype: object

**To Filter our Data Frame:**

In [24]:
## If we only want to view car sales with the make Toyota, we can use boolean indexing...
car_sales[car_sales["Make"] == "Toyota"]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
5,Toyota,Green,99213,4,"$4,500.00"
8,Toyota,White,60000,4,"$6,250.00"


In [26]:
## If we only want to view car sales with over 100000 KM on the Odometer...
car_sales[car_sales["Odometer (KM)"] > 100000]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
4,Nissan,White,213095,4,"$3,500.00"


**pd.crosstab()**

In [32]:
#The function pd.crosstab() allows us to compare two columns
pd.crosstab(car_sales["Make"], car_sales["Doors"])

Doors,3,4,5
Make,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BMW,0,0,1
Honda,0,3,0
Nissan,0,2,0
Toyota,1,3,0


**.groupby()**

In [35]:
#The function .groupby() allows us to group by any column and then call some operation
car_sales.groupby(["Make"]).mean()

Unnamed: 0_level_0,Odometer (KM),Doors
Make,Unnamed: 1_level_1,Unnamed: 2_level_1
BMW,11179.0,5.0
Honda,62778.333333,4.0
Nissan,122347.5,4.0
Toyota,85451.25,3.75


In [36]:
#Exercise: Group by white Toyotas and see what happens from there
car_sales.groupby([car_sales["Colour"] == "White"]).mean()

Unnamed: 0_level_0,Odometer (KM),Doors
Colour,Unnamed: 1_level_1,Unnamed: 2_level_1
False,55212.666667,4.0
True,113684.5,4.0


In [39]:
#Exercise: Group by white Toyotas and see what happens from there
car_sales.groupby([car_sales["Make"] == "Toyota"] and [car_sales["Colour"] == "White"]).mean()

Unnamed: 0_level_0,Odometer (KM),Doors
Colour,Unnamed: 1_level_1,Unnamed: 2_level_1
False,55212.666667,4.0
True,113684.5,4.0
