### Comparison using Numpy array

We can use the logic and functions from the `NumPy` package to do an element-wise boolean operation on `NumPy` arrays because Pandas is built on `NumPy`. And we still can also use that functions from `NumPy` here.

In [1]:
import pandas as pd  # Pandas is built on top of Numpy
import numpy as np

In [2]:
# Read the data brics.csv in Pandas DataFrame
# The data is one step above the current directory
brics = pd.read_csv("../brics.csv", index_col = 0)
print(brics)

         country    capital    area  population
BR        Brazil   Brasilia   8.516      200.40
RU        Russia     Moscow  17.100      143.50
IN         India  New Delhi   3.286     1252.00
CH         China    Beijing   9.597     1357.00
SA  South Africa   Pretoria   1.221       52.98


In [3]:
# Compare the results of the following three lines
print(brics["area"], "\n", "Data Type:", type(brics["area"]), "\n")
print(brics.loc[:, "area"], "\n", "Data Type:", type(brics.loc[:, "area"]), "\n")
print(brics.iloc[:, 2], "\n", "Data Type:", type(brics.iloc[:, 2]), "\n")

BR     8.516
RU    17.100
IN     3.286
CH     9.597
SA     1.221
Name: area, dtype: float64 
 Data Type: <class 'pandas.core.series.Series'> 

BR     8.516
RU    17.100
IN     3.286
CH     9.597
SA     1.221
Name: area, dtype: float64 
 Data Type: <class 'pandas.core.series.Series'> 

BR     8.516
RU    17.100
IN     3.286
CH     9.597
SA     1.221
Name: area, dtype: float64 
 Data Type: <class 'pandas.core.series.Series'> 



In [4]:
# Actual comparison
is_huge = brics["area"] > 8
print(is_huge)

BR     True
RU     True
IN    False
CH     True
SA    False
Name: area, dtype: bool


In [5]:
# Subset the DataFrame with is_huge: returns only the rows where is_huge is True
print(brics[is_huge])

   country   capital    area  population
BR  Brazil  Brasilia   8.516       200.4
RU  Russia    Moscow  17.100       143.5
CH   China   Beijing   9.597      1357.0


### Filtering numpy array

In [6]:
np.logical_and(brics["area"] > 8, brics["area"] < 10)

BR     True
RU    False
IN    False
CH     True
SA    False
Name: area, dtype: bool

In [7]:
# Subset the above result into brics
brics[np.logical_and(brics["area"] > 8, brics["area"] < 10)]

Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
CH,China,Beijing,9.597,1357.0


### Another Example

In [8]:
import pandas as np

# Read the cars.csv data in Pandas DataFrame
cars = pd.read_csv("../cars.csv", index_col = 0)
print(cars)

     cars_per_cap        country  drives_right
US            809  United States          True
AUS           731      Australia         False
JPN           588          Japan         False
IN             18          India         False
RU            200         Russia          True
MOR            70        Morocco          True
EG             45          Egypt          True


In [9]:
# Extract drives_right column as Pandas Series: dr
dr = cars["drives_right"]

# Use dr to subset cars: sel
sel = cars[dr]

# Print sel
print(sel)

     cars_per_cap        country  drives_right
US            809  United States          True
RU            200         Russia          True
MOR            70        Morocco          True
EG             45          Egypt          True


In [10]:
# You can convert above code into one line because dr is a Pandas Series
sel2 = cars[cars["drives_right"]]
print(sel2)

     cars_per_cap        country  drives_right
US            809  United States          True
RU            200         Russia          True
MOR            70        Morocco          True
EG             45          Egypt          True


In [11]:
# Finding high cars per capita
# Select the cars_per_cap column from cars as a Pandas Series and store it as cpc.
cpc = cars["cars_per_cap"]
many_cars = cpc > 500

# Use many_cars to subset cars. Store the result as car_maniac.
car_maniac = cars[many_cars]
print(car_maniac)

     cars_per_cap        country  drives_right
US            809  United States          True
AUS           731      Australia         False
JPN           588          Japan         False


In [12]:
# Convert above code into one line
car_maniac2 = cars[cars["cars_per_cap"] > 500]
print(car_maniac2)

     cars_per_cap        country  drives_right
US            809  United States          True
AUS           731      Australia         False
JPN           588          Japan         False


In [13]:
# Import cars data
import pandas as pd
import numpy as np

cars = pd.read_csv('../cars.csv', index_col = 0)

# Create medium: observations with cars_per_cap between 100 and 500
cpc = cars["cars_per_cap"]
between = np.logical_and(cpc > 100, cpc < 500)
medium = cars[between]

# Print medium
print(medium)

    cars_per_cap country  drives_right
RU           200  Russia          True
