![title](Header__0009_1.png "Header")
___
# Chapter 1 - Data Munging Basics
## Segment 1 - Filtering and selecting data

In [14]:
import numpy as np
import pandas as pd

from pandas import Series, DataFrame

### Selecting and retrieving data

In [15]:
series_obj = Series(np.arange(8), index=['row 1', 'row 2','row 3','row 4','row 5', 'row 6', 'row 7', 'row 8'])
series_obj

row 1    0
row 2    1
row 3    2
row 4    3
row 5    4
row 6    5
row 7    6
row 8    7
dtype: int64

In [5]:
# ['label-index']
# ♔┈♔┈♔┈( WHAT THIS DOES ) ┈♔┈♔┈♔
# When you write square brackets with a label-index inside them, this tells Python to select and 
# retrieve all records with that label-index.
series_obj['row 5']

4

In [6]:
# [integer index] 
# ♔┈♔┈♔┈( WHAT THIS DOES )┈♔┈♔┈♔
# When you write square brackets with an integer index inside them, this tells Python to select and 
# retrieve all records with the specified integer index.
series_obj[[0,7]]

row 1    0
row 8    7
dtype: int64

In [7]:
np.random.seed(1) #https://stackoverflow.com/a/40251530/11235609
# if np,random.seed(fixed_number) then everytime the result by np.random.rand is same
# value of fixed_number doesnt matter!!!
DF_obj = DataFrame(np.random.rand(36).reshape((6,6)), 
                   index=['row 1', 'row 2', 'row 3', 'row 4', 'row 5', 'row 6'],
                   columns=['column 1', 'column 2', 'column 3', 'column 4', 'column 5', 'column 6'])
DF_obj

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,0.417022,0.720324,0.000114,0.302333,0.146756,0.092339
row 2,0.18626,0.345561,0.396767,0.538817,0.419195,0.68522
row 3,0.204452,0.878117,0.027388,0.670468,0.417305,0.55869
row 4,0.140387,0.198101,0.800745,0.968262,0.313424,0.692323
row 5,0.876389,0.894607,0.085044,0.039055,0.16983,0.878143
row 6,0.098347,0.421108,0.95789,0.533165,0.691877,0.315516


In [16]:
# object_name.ix[[row indexes], [column indexes]]
# ♔┈♔┈♔┈( WHAT THIS DOES )┈♔┈♔┈♔
# When you call the .ix[] special indexer, and pass in a set of row and colum indexes, this tells 
# Python to select and retrieve only those specific rows and columns.
DF_obj.loc[['row 2', 'row 5'], ['column 5', 'column 2']]

Unnamed: 0,column 5,column 2
row 2,0.419195,0.345561
row 5,0.16983,0.894607


### Data slicing

In [9]:
# ['starting label-index':'ending label-index'] 
# ♔┈♔┈♔┈( WHAT THIS DOES )┈♔┈♔┈♔
# Data slicing allows you to select and retrieve all records from the starting label-index, to the 
# ending label-index, and every record in between.
series_obj['row 3':'row 7']

row 3    2
row 4    3
row 5    4
row 6    5
row 7    6
dtype: int64

### Comparing with scalars

In [10]:

# object_name < scalar value
# ♔┈♔┈♔┈( WHAT THIS DOES )┈♔┈♔┈♔
# You can use comparison operators (like greater than or less than) to return True / False values for 
# all records, to indicate how each element compares to a scalar value. 
DF_obj < .2

Unnamed: 0,column 1,column 2,column 3,column 4,column 5,column 6
row 1,False,False,True,False,True,True
row 2,True,False,False,False,False,False
row 3,False,False,True,False,False,False
row 4,True,True,False,False,False,False
row 5,False,False,True,True,True,False
row 6,True,False,False,False,False,False


### Filtering with scalars

In [11]:
# object_name[object_name > scalar value] 
# ♔┈♔┈♔┈( WHAT THIS DOES )┈♔┈♔┈♔
# You can also use comparison operators and scalar values for indexing, to return only the records 
# that satisfy the comparison expression you write.
series_obj[series_obj > 6]

row 8    7
dtype: int64

### Setting values with scalars

In [12]:
# ['label-index', 'label-index', 'label-index'] = scalar value
# ♔┈♔┈♔┈( WHAT THIS DOES )┈♔┈♔┈♔
# Setting is where you select all records associated with the specified label-indexes and set those 
# values equal to a scalar.
series_obj['row 1', 'row 5', 'row 8'] = 8

In [13]:
series_obj

row 1    8
row 2    1
row 3    2
row 4    3
row 5    8
row 6    5
row 7    6
row 8    8
dtype: int64