# Percent_change
Series, DatFrames and Panel, all have the function pct_change(). This function compares every element with its prior element and computes the change percentage.

In [5]:
import pandas as pd
import numpy as np
s = pd.Series([1,2,3,4,5,4])
print(s)
print(s.pct_change())

df = pd.DataFrame(np.random.randn(5, 2))
print(df.pct_change())

# By default, the pct_change() operates on columns; if you want to apply the same row wise, then use axis=1() argument.

0    1
1    2
2    3
3    4
4    5
5    4
dtype: int64
0         NaN
1    1.000000
2    0.500000
3    0.333333
4    0.250000
5   -0.200000
dtype: float64
           0         1
0        NaN       NaN
1  18.469605 -1.807851
2  -1.665887 -0.600807
3  -1.873809  4.607648
4  -1.438265 -0.978726


# Covariance
Covariance is applied on series data. The Series object has a method cov to compute covariance between series objects. NA will be excluded automatically.

In [6]:
import pandas as pd
import numpy as np
s1 = pd.Series(np.random.randn(10))
s2 = pd.Series(np.random.randn(10))
print(s1.cov(s2))

0.14361050913525644


In [8]:
# Covariance method when applied on a DataFrame, computes cov between all the columns.

import pandas as pd
import numpy as np
frame = pd.DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e'])
print(frame['a'].cov(frame['b']))
print (frame.cov())

# Note − Observe the cov between a and b column in the first statement and the same is the value returned by cov on DataFrame.

-0.4919548787212941
          a         b         c         d         e
a  1.515605 -0.491955 -0.195372  0.013336 -0.117609
b -0.491955  1.020031 -0.409498 -0.086240  0.099735
c -0.195372 -0.409498  1.649113  0.119809 -0.760584
d  0.013336 -0.086240  0.119809  0.919401 -0.087486
e -0.117609  0.099735 -0.760584 -0.087486  1.052796


# Correlation
Correlation shows the linear relationship between any two array of values (series). There are multiple methods to compute the correlation like pearson(default), spearman and kendall.

In [10]:
import pandas as pd
import numpy as np
frame = pd.DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e'])

print(frame['a'].corr(frame['b']))
print(frame.corr())

# If any non-numeric column is present in the DataFrame, it is excluded automatically.

0.02073370913987883
          a         b         c         d         e
a  1.000000  0.020734  0.083472 -0.207064 -0.319218
b  0.020734  1.000000 -0.309663 -0.248847 -0.337124
c  0.083472 -0.309663  1.000000  0.262084 -0.150408
d -0.207064 -0.248847  0.262084  1.000000  0.627536
e -0.319218 -0.337124 -0.150408  0.627536  1.000000


# Data Ranking
Data Ranking produces ranking for each element in the array of elements. In case of ties, assigns the mean rank.

In [13]:
import pandas as pd
import numpy as np

s = pd.Series(np.random.randn(5), index=list('abcde'))
s['d'] = s['b'] # so there's a tie
print(s.rank())

a    5.0
b    3.5
c    1.0
d    3.5
e    2.0
dtype: float64


# Rank optionally takes a parameter ascending which by default is true; when false, data is reverse-ranked, with larger values assigned a smaller rank.

Rank supports different tie-breaking methods, specified with the method parameter −

average − average rank of tied group

min − lowest rank in the group

max − highest rank in the group

first − ranks assigned in the order they appear in the array

# Syntax: DataFrame.rank(self, axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False)

In [19]:
import numpy as np
import pandas as pd
df = pd.DataFrame(data={'Animal': ['fox', 'Kangaroo', 'deer',
                                   'spider', 'snake'],
                        'Number_legs': [4, 2, 4, 8, np.nan]})
print(df)


     Animal  Number_legs
0       fox          4.0
1  Kangaroo          2.0
2      deer          4.0
3    spider          8.0
4     snake          NaN


In [20]:
df['default_rank'] = df['Number_legs'].rank()
df['max_rank'] = df['Number_legs'].rank(method='max')
df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom')
df['pct_rank'] = df['Number_legs'].rank(pct=True)
print(df)

     Animal  Number_legs  default_rank  max_rank  NA_bottom  pct_rank
0       fox          4.0           2.5       3.0        2.5     0.625
1  Kangaroo          2.0           1.0       1.0        1.0     0.250
2      deer          4.0           2.5       3.0        2.5     0.625
3    spider          8.0           4.0       4.0        4.0     1.000
4     snake          NaN           NaN       NaN        5.0       NaN


# ***DataFrame.rank() procedure images attached