In [2]:
import numpy as np
import pandas as pd

### ReIndexing

In [3]:
obj = pd.Series([1,2,3,4], index=['a','b','c','d'])
obj

a    1
b    2
c    3
d    4
dtype: int64

### Reindexing converts the values to float

In [4]:
obj1 = obj.reindex(['b','d','c','a','e'])
obj1

b    2.0
d    4.0
c    3.0
a    1.0
e    NaN
dtype: float64

### ffills() -> for forward filling the values

In [5]:
obj = pd.Series(['blue','yellow','green'], index=[0,2,4])
obj

0      blue
2    yellow
4     green
dtype: object

In [7]:
obj1 = obj.reindex(range(6), method='ffill')
obj1

0      blue
1      blue
2    yellow
3    yellow
4     green
5     green
dtype: object

### bfills does in the reverse order

In [17]:
obj1 = obj.reindex(range(6), method='bfill')
obj1

0      blue
1    yellow
2    yellow
3     green
4     green
5       NaN
dtype: object

In [8]:
df = pd.DataFrame(np.arange(9).reshape(3,3),
                 index=['a','b','c'],
                 columns=['Maha','Delhi','Kerala'])
df

Unnamed: 0,Maha,Delhi,Kerala
a,0,1,2
b,3,4,5
c,6,7,8


In [9]:
df1 = df.reindex(['a','b','d','c'])
df1

Unnamed: 0,Maha,Delhi,Kerala
a,0.0,1.0,2.0
b,3.0,4.0,5.0
d,,,
c,6.0,7.0,8.0


In [10]:
df2 = df.reindex(columns=['TN','Odisha','Bihar'])
df2

Unnamed: 0,TN,Odisha,Bihar
a,,,
b,,,
c,,,


### since we had not provided values of these, they are NaN

In [14]:
df.loc[['a','d','c','b'], ['Delhi','Kerala','Maha']]

Unnamed: 0,Delhi,Kerala,Maha
a,1.0,2.0,0.0
d,,,
c,7.0,8.0,6.0
b,4.0,5.0,3.0


In [16]:
df.loc[['a','d','c','b'], ['Delhi','Kerala','Odisha']]

Unnamed: 0,Delhi,Kerala,Odisha
a,1.0,2.0,
d,,,
c,7.0,8.0,
b,4.0,5.0,


### dropping the columns or the rows

In [19]:
obj = pd.Series(range(6), index=['a','b','c','d','e','f'])
obj

a    0
b    1
c    2
d    3
e    4
f    5
dtype: int64

In [20]:
obj.drop('d')

a    0
b    1
c    2
e    4
f    5
dtype: int64

In [21]:
obj.drop(['a','f'])

b    1
c    2
d    3
e    4
dtype: int64

### dropping from data frame

In [22]:
df = pd.DataFrame(np.arange(16).reshape(4,4),
                 index=['a','b','c','d'],
                 columns=['India','Pakistan','China','USA'])
df

Unnamed: 0,India,Pakistan,China,USA
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [23]:
df.drop('a')

Unnamed: 0,India,Pakistan,China,USA
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [24]:
df.drop('China', axis= 'columns')

Unnamed: 0,India,Pakistan,USA
a,0,1,3
b,4,5,7
c,8,9,11
d,12,13,15


In [25]:
df.drop('Pakistan', axis=1)

Unnamed: 0,India,China,USA
a,0,2,3
b,4,6,7
c,8,10,11
d,12,14,15


### using inplace=True changes the data in the original calling obj

In [26]:
df

Unnamed: 0,India,Pakistan,China,USA
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [27]:
df.drop('Pakistan', axis='columns', inplace=True)

In [28]:
df

Unnamed: 0,India,China,USA
a,0,2,3
b,4,6,7
c,8,10,11
d,12,14,15


### Pakistan column is dropped from the data itself

In [29]:
obj

a    0
b    1
c    2
d    3
e    4
f    5
dtype: int64

### slicing is inclusive of the end point or value

In [31]:
obj['b':'d']

b    1
c    2
d    3
dtype: int64

### loc vs iloc

#### loc = axis labels , iloc - Integers

In [32]:
df

Unnamed: 0,India,China,USA
a,0,2,3
b,4,6,7
c,8,10,11
d,12,14,15


In [33]:
obj

a    0
b    1
c    2
d    3
e    4
f    5
dtype: int64

In [34]:
df.loc['a']

India    0
China    2
USA      3
Name: a, dtype: int32

In [35]:
df.loc['a',['India']]

India    0
Name: a, dtype: int32

In [36]:
df.iloc[0]

India    0
China    2
USA      3
Name: a, dtype: int32

In [37]:
df.iloc[0,[1]]

China    2
Name: a, dtype: int32

In [39]:
df.loc[:'d']

Unnamed: 0,India,China,USA
a,0,2,3
b,4,6,7
c,8,10,11
d,12,14,15


In [41]:
df.iloc[:4]

Unnamed: 0,India,China,USA
a,0,2,3
b,4,6,7
c,8,10,11
d,12,14,15


In [43]:
df.at['a','India']

0

In [45]:
df.iat[0,0]

0

In [53]:
df1 = pd.DataFrame(np.arange(12).reshape(3,4),
                  index=['a','b','c'],
                  columns=['Bangalore','Delhi','Kolkata','Chennai'])

In [54]:
df1

Unnamed: 0,Bangalore,Delhi,Kolkata,Chennai
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11


In [56]:
df2 = pd.DataFrame(np.arange(12).reshape(3,4),
                  index=['a','c','e'],
                  columns=['Mumbai','Chennai','Patna','Jaipur'])

In [57]:
df2

Unnamed: 0,Mumbai,Chennai,Patna,Jaipur
a,0,1,2,3
c,4,5,6,7
e,8,9,10,11


### if value is missing in one , it defaults to NaN

In [58]:
df1 + df2

Unnamed: 0,Bangalore,Chennai,Delhi,Jaipur,Kolkata,Mumbai,Patna
a,,4.0,,,,,
b,,,,,,,
c,,16.0,,,,,
e,,,,,,,


### filling default values when values are missing in one

In [59]:
df1.add(df2, fill_value=0)

Unnamed: 0,Bangalore,Chennai,Delhi,Jaipur,Kolkata,Mumbai,Patna
a,0.0,4.0,1.0,3.0,2.0,0.0,2.0
b,4.0,7.0,5.0,,6.0,,
c,8.0,16.0,9.0,7.0,10.0,4.0,6.0
e,,9.0,,11.0,,8.0,10.0


In [60]:
obj

a    0
b    1
c    2
d    3
e    4
f    5
dtype: int64

In [63]:
obj1 = obj.reindex(['e','b','d','a','c','f'])
obj1

e    4
b    1
d    3
a    0
c    2
f    5
dtype: int64

### sort according to index

In [64]:
obj1.sort_index()

a    0
b    1
c    2
d    3
e    4
f    5
dtype: int64

In [65]:
obj1.sort_index(ascending=False)

f    5
e    4
d    3
c    2
b    1
a    0
dtype: int64

### sorting according to values

In [66]:
obj1.sort_values()

a    0
b    1
c    2
d    3
e    4
f    5
dtype: int64

In [69]:
obj1['c'] = np.nan
obj1['f'] = np.nan

In [70]:
obj1

e    4.0
b    1.0
d    3.0
a    0.0
c    NaN
f    NaN
dtype: float64

In [71]:
obj1.sort_values()

a    0.0
b    1.0
d    3.0
e    4.0
c    NaN
f    NaN
dtype: float64

### NaN values are added at the end

In [72]:
obj

a    0.0
b    1.0
c    NaN
d    3.0
e    4.0
f    5.0
dtype: float64

In [73]:
obj['c'] = 2.0
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
f    5.0
dtype: float64

In [74]:
obj.rank()

a    1.0
b    2.0
c    3.0
d    4.0
e    5.0
f    6.0
dtype: float64

### is_unique() method to know if the index is unique or not

In [76]:
obj.index.is_unique

True

In [77]:
obj1 = pd.Series(range(3), index=['a','a','b'])
obj1

a    0
a    1
b    2
dtype: int64

In [78]:
obj1.index.is_unique

False

In [79]:
obj1['a']

a    0
a    1
dtype: int64

### idxmax() and idxmin() for index where max and min values occur

In [80]:
obj1.idxmax()

'b'

In [81]:
obj1.idxmin()

'a'

In [82]:
obj.describe

<bound method NDFrame.describe of a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
f    5.0
dtype: float64>

In [83]:
df

Unnamed: 0,India,China,USA
a,0,2,3
b,4,6,7
c,8,10,11
d,12,14,15


In [84]:
df.describe()

Unnamed: 0,India,China,USA
count,4.0,4.0,4.0
mean,6.0,8.0,9.0
std,5.163978,5.163978,5.163978
min,0.0,2.0,3.0
25%,3.0,5.0,6.0
50%,6.0,8.0,9.0
75%,9.0,11.0,12.0
max,12.0,14.0,15.0


In [85]:
df.idxmax()

India    d
China    d
USA      d
dtype: object

In [86]:
df.idxmin()

India    a
China    a
USA      a
dtype: object

In [87]:
df.sum()

India    24
China    32
USA      36
dtype: int64

In [88]:
df.sum(axis='columns')

a     5
b    17
c    29
d    41
dtype: int64