### Shift()

In [1]:
import pandas as pd
import numpy as np
df = pd.DataFrame({'DATE': [1, 2, 3, 4, 5],
                   'VOLUME': [100, 200, 300,400,500],
                   'PRICE': [214, 234, 253,272,291]})

In [2]:
print(df)

   DATE  VOLUME  PRICE
0     1     100    214
1     2     200    234
2     3     300    253
3     4     400    272
4     5     500    291


In [3]:
df.shift(1)

Unnamed: 0,DATE,VOLUME,PRICE
0,,,
1,1.0,100.0,214.0
2,2.0,200.0,234.0
3,3.0,300.0,253.0
4,4.0,400.0,272.0


In [4]:
# with fill_Value = 0
df.shift(1,fill_value=0)

Unnamed: 0,DATE,VOLUME,PRICE
0,0,0,0
1,1,100,214
2,2,200,234
3,3,300,253
4,4,400,272


In [5]:
df['PREV_DAY_PRICE'] = df['PRICE'].shift(1,fill_value=0)
print(df)

   DATE  VOLUME  PRICE  PREV_DAY_PRICE
0     1     100    214               0
1     2     200    234             214
2     3     300    253             234
3     4     400    272             253
4     5     500    291             272


In [6]:
#We can easily calculate the last three day’s average stock price as below and create a new feature column.
df['LAST_3_DAYS_AVE_PRICE'] = (df['PRICE'].shift(1,fill_value=0) + 
                               df['PRICE'].shift(2,fill_value=0) + 
                               df['PRICE'].shift(3,fill_value=0))/3

In [7]:
#We can move forward as well to get value from the next timestep or next row.
df['TOMORROW_PRICE'] = df['PRICE'].shift(-1,fill_value=0)

In [8]:
print(df)

   DATE  VOLUME  PRICE  PREV_DAY_PRICE  LAST_3_DAYS_AVE_PRICE  TOMORROW_PRICE
0     1     100    214               0               0.000000             234
1     2     200    234             214              71.333333             253
2     3     300    253             234             149.333333             272
3     4     400    272             253             233.666667             291
4     5     500    291             272             253.000000               0


### value_counts()

In [9]:
#This function can be used with the index or pandas series.
a = pd.Index([3,3,4,2,1,3, 1, 2, 3, 4, np.nan,4,6,7])
a.value_counts()

3.0    4
4.0    3
1.0    2
2.0    2
7.0    1
6.0    1
dtype: int64

In [10]:
b = pd.Series(['ab','bc','cd',1,'cd','cd','bc','ab','bc',1,2,3,2,3,np.nan,1,np.nan])
b.value_counts()

cd    3
1     3
bc    3
ab    2
3     2
2     2
dtype: int64

In [11]:
#Bin option can be used instead of counting unique apparitions of values, 
#divide the index in the specified number of half-open bins.
a = pd.Index([3,3,4,2,1,3, 1, 2, 3, 4, np.nan,4,6,7])
a.value_counts(bins=4)

(2.5, 4.0]      7
(0.993, 2.5]    4
(5.5, 7.0]      2
(4.0, 5.5]      0
dtype: int64

### mask()

The mask method is an application of the if-then condition for each element of a Series or DataFrame. If Cond is True, then it uses the value from Other (default value is NaN) else would retain the original value. This mask() method is quite similar to where().

In [12]:
df = pd.DataFrame(np.arange(15).reshape(-1, 3), columns=['A', 'B','C'])
print(df)

    A   B   C
0   0   1   2
1   3   4   5
2   6   7   8
3   9  10  11
4  12  13  14


In [13]:
#mask operation to check if element is divided by 2 without any remainder. If match change the sign of the element as original
df.mask(df % 2 == 0,-df)

Unnamed: 0,A,B,C
0,0,1,-2
1,3,-4,5
2,-6,7,-8
3,9,-10,11
4,-12,13,-14


### nlargest()

In [14]:
df = pd.DataFrame({'HEIGHT': [170,78,99,160,160,130,155,70,70,20],
                   'WEIGHT': [50,60,70,80,90,90,90,50,60,70]},
                   index=['A','B','C','D','E','F','G','H','I','J'])
print(df)

   HEIGHT  WEIGHT
A     170      50
B      78      60
C      99      70
D     160      80
E     160      90
F     130      90
G     155      90
H      70      50
I      70      60
J      20      70


In [15]:
dfl = df.nlargest(3,'HEIGHT')
print(dfl)

   HEIGHT  WEIGHT
A     170      50
D     160      80
E     160      90


In [16]:
#In case of tie
dfl = df.nlargest(2,'HEIGHT',keep='all')
print(dfl)

   HEIGHT  WEIGHT
A     170      50
D     160      80
E     160      90


In [17]:
#Keep the last occurrence.
dfl = df.nlargest(2,'HEIGHT',keep='last')
print(dfl)

   HEIGHT  WEIGHT
A     170      50
E     160      90


In [18]:
#Keep the first occurrence.
dfl = df.nlargest(2,'HEIGHT',keep='first')
print(dfl)

   HEIGHT  WEIGHT
A     170      50
D     160      80


### nsmallest()

In [19]:
dfs = df.nsmallest(3,'WEIGHT')
print(dfs)

   HEIGHT  WEIGHT
A     170      50
H      70      50
B      78      60
