# Essential Functionality

## 1. Reindexing

### 1.1 Series Reindexing

In [1]:
import numpy as np
import pandas as pd

In [2]:
series = pd.Series(np.arange(4), index=list('dbac'))
series

d    0
b    1
a    2
c    3
dtype: int32

In [3]:
series_2 = series.reindex(list('abcde'))
series_2

a    2.0
b    1.0
c    3.0
d    0.0
e    NaN
dtype: float64

In [4]:
series_3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
series_3

0      blue
2    purple
4    yellow
dtype: object

In [5]:
#method='ffill' for missing value in index
series_3.reindex(np.arange(6), method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

### 1.2 DataFrame Reindexing

In [6]:
frame = pd.DataFrame(np.arange(9).reshape((3, 3)), 
                     index=list('acd'),
                     columns=['Ohio', 'Texas', 'California'])

In [7]:
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [8]:
#rows reindexing
frame_2 = frame.reindex(list('abcd'))
frame_2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [9]:
#columns reindexing
states = ['Texas', 'Utah', 'California']
frame.reindex(columns=states)

Unnamed: 0,Texas,Utah,California
a,1,,2
c,4,,5
d,7,,8


## 2. Dropping Rows and Columns from an Axis

### 2.1 Series

In [10]:
series = pd.Series(np.arange(5), index=list('abcde'))
series

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [11]:
#dropping rows (index)
new_series = series.drop('c')
new_series

a    0
b    1
d    3
e    4
dtype: int32

In [12]:
#dropping multiple rows
series.drop(['b', 'c'])

a    0
d    3
e    4
dtype: int32

### 2.2 DataFrame

In [13]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])

In [14]:
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [15]:
#dropping rows, axis eefects to rows first => axis here calculated interchangeably
data.drop(['Colorado', 'Utah'], axis=0)

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
New York,12,13,14,15


In [16]:
#dropping columns
data.drop(['one', 'three'], axis=1)

Unnamed: 0,two,four
Ohio,1,3
Colorado,5,7
Utah,9,11
New York,13,15


In [17]:
#in-place drop
series

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [18]:
series.drop('c', inplace=True)
series

a    0
b    1
d    3
e    4
dtype: int32

## 3. Indexing, Selection, and Filtering

### 3.1 Series

In [19]:
series = pd.Series(np.arange(4), index=list('abcd'))
series

a    0
b    1
c    2
d    3
dtype: int32

In [20]:
series['b']

1

In [21]:
series[1]

1

In [22]:
series[2:4]

c    2
d    3
dtype: int32

In [23]:
series[['b', 'a', 'd']]

b    1
a    0
d    3
dtype: int32

In [24]:
series[[1, 0, 3]]

b    1
a    0
d    3
dtype: int32

In [25]:
series[series < 2]

a    0
b    1
dtype: int32

In [26]:
#slicing with end labels
#end-point is inclusive
series['b':'c']

b    1
c    2
dtype: int32

In [27]:
series['b':'c'] = 5
series

a    0
b    5
c    5
d    3
dtype: int32

### 3.1 DataFrame

In [28]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])

In [29]:
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [30]:
#selecting column values
data['two']

Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int32

In [31]:
data[['three', 'one']]

Unnamed: 0,three,one
Ohio,2,0
Colorado,6,4
Utah,10,8
New York,14,12


In [32]:
#rows
data[:2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [33]:
data[data['three'] > 5]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [34]:
data < 5

Unnamed: 0,one,two,three,four
Ohio,True,True,True,True
Colorado,True,False,False,False
Utah,False,False,False,False
New York,False,False,False,False


In [35]:
#numpy method
data[data < 5] = 0
data

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


### 3.3 Selection with `loc` and `iloc`
* DataFrame row selection

In [36]:
#loc selects from both sides, first rows then columns
#NumPy selection is the same
data.loc['Colorado', ['two', 'three']]

two      5
three    6
Name: Colorado, dtype: int32

In [37]:
#only integer values can be selected
data.iloc[2, [3, 0, 2]]

four     11
one       8
three    10
Name: Utah, dtype: int32

In [38]:
data.iloc[2]

one       8
two       9
three    10
four     11
Name: Utah, dtype: int32

In [39]:
data.iloc[[1, 2], [3, 0, 1]]

Unnamed: 0,four,one,two
Colorado,7,0,5
Utah,11,8,9


In [40]:
#both function work with slices

In [41]:
data.loc[:'Utah', ['one', 'three']]

Unnamed: 0,one,three
Ohio,0,0
Colorado,0,6
Utah,8,10


In [42]:
data.iloc[2:4, 1:3]

Unnamed: 0,two,three
Utah,9,10
New York,13,14


## 4. Integer Indexes
* Avoid using `-1` selection for indexes in Pandas. It is label-oriented. 
* Try to use `.loc` or `.iloc` for rows selection on both Series and DataFrame