In [52]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from numpy.random import randn

In [2]:
ser1 = Series(np.arange(3), index=['a','b','c'])
ser1

a    0
b    1
c    2
dtype: int32

In [3]:
#To drop an index
ser1.drop('b')

a    0
c    2
dtype: int32

In [4]:
#How it works with a DataFrame
df1 = DataFrame(np.arange(9).reshape((3,3)), index=['SF','LA','NY'], columns=['pop','size','year'] )
df1

Unnamed: 0,pop,size,year
SF,0,1,2
LA,3,4,5
NY,6,7,8


In [5]:
#To drop a row
df1.drop('LA')       #This doesn't permanently affect an array

Unnamed: 0,pop,size,year
SF,0,1,2
NY,6,7,8


In [6]:
df1

Unnamed: 0,pop,size,year
SF,0,1,2
LA,3,4,5
NY,6,7,8


In [8]:
# To permanantyly drop a row
df_del = df1.drop('LA')
df_del

Unnamed: 0,pop,size,year
SF,0,1,2
NY,6,7,8


In [9]:
#To drop a column
df1.drop('year', axis=1)

Unnamed: 0,pop,size
SF,0,1
LA,3,4
NY,6,7


# Selecting entries

In [10]:
ser1 = Series(np.arange(3), index=['A','B','C'])
ser1 = 2*ser1
ser1

A    0
B    2
C    4
dtype: int32

In [11]:
ser1['B']

2

In [12]:
ser1[1]

2

In [13]:
ser1[0:2]

A    0
B    2
dtype: int32

In [14]:
ser1[['A','B']]

A    0
B    2
dtype: int32

In [16]:
# To grab by logic
ser1[ser1>=2]

B    2
C    4
dtype: int32

In [18]:
ser1[ser1>3] = 18
ser1

A     0
B     2
C    18
dtype: int32

In [20]:
df = DataFrame(np.arange(25).reshape(5,5), index=['NYC', 'LA', 'SF', 'DC', 'CHI'],
                  columns=['A','B','C','D','E'])
df

Unnamed: 0,A,B,C,D,E
NYC,0,1,2,3,4
LA,5,6,7,8,9
SF,10,11,12,13,14
DC,15,16,17,18,19
CHI,20,21,22,23,24


In [21]:
#Selecting data from a dataframe
df['B']

NYC     1
LA      6
SF     11
DC     16
CHI    21
Name: B, dtype: int32

In [22]:
df[['B','E']]

Unnamed: 0,B,E
NYC,1,4
LA,6,9
SF,11,14
DC,16,19
CHI,21,24


In [23]:
df[df['B'] > 8]        #Returns every row where row B > 8

Unnamed: 0,A,B,C,D,E
SF,10,11,12,13,14
DC,15,16,17,18,19
CHI,20,21,22,23,24


In [24]:
#Boolean DAtafRame
df > 10

Unnamed: 0,A,B,C,D,E
NYC,False,False,False,False,False
LA,False,False,False,False,False
SF,False,True,True,True,True
DC,True,True,True,True,True
CHI,True,True,True,True,True


In [26]:
#To access rows using labels
df.loc['LA']

A    5
B    6
C    7
D    8
E    9
Name: LA, dtype: int32

In [27]:
#To access rows using index
df.iloc[1]

A    5
B    6
C    7
D    8
E    9
Name: LA, dtype: int32

# Data alignment

In [28]:
ser1 = Series([0,1,2], index=['A','B','C'])
ser1

A    0
B    1
C    2
dtype: int64

In [29]:
ser2 = Series([3,4,5,6], index=['A','B','C','D'])
ser2

A    3
B    4
C    5
D    6
dtype: int64

In [30]:
ser1+ser2

A    3.0
B    5.0
C    7.0
D    NaN
dtype: float64

In [32]:
df1 = DataFrame(np.arange(4).reshape(2,2), columns=list('AB'), index=['NYC', 'LA'])
df1

Unnamed: 0,A,B
NYC,0,1
LA,2,3


In [37]:
df2 = DataFrame(np.arange(9).reshape(3,3), columns=list('ADC'),
               index=['NYC', 'SF', 'LA'])
df2

Unnamed: 0,A,D,C
NYC,0,1,2
SF,3,4,5
LA,6,7,8


In [38]:
df1 + df2

Unnamed: 0,A,B,C,D
LA,8.0,,,
NYC,0.0,,,
SF,,,,


In [39]:
# to replace null values
df1

Unnamed: 0,A,B
NYC,0,1
LA,2,3


In [40]:
df1.add(df2, fill_value=0)

Unnamed: 0,A,B,C,D
LA,8.0,3.0,8.0,7.0
NYC,0.0,1.0,2.0,1.0
SF,3.0,,5.0,4.0


In [41]:
df2

Unnamed: 0,A,D,C
NYC,0,1,2
SF,3,4,5
LA,6,7,8


In [42]:
# Operations between series and dataframes
ser3 = df2.iloc[0]
ser3

A    0
D    1
C    2
Name: NYC, dtype: int32

In [43]:
df2-ser3

Unnamed: 0,A,D,C
NYC,0,0,0
SF,3,3,3
LA,6,6,6


# Rank and Sort

In [44]:
ser1 = Series(range(3), index=['C','A','B'])
ser1

C    0
A    1
B    2
dtype: int64

In [45]:
ser1.sort_index()

A    1
B    2
C    0
dtype: int64

In [47]:
ser1.sort_values()  

C    0
A    1
B    2
dtype: int64

In [53]:
ser2 = Series(randn(10))
ser2

0   -1.104315
1   -0.179343
2   -0.151797
3    0.701766
4    0.649632
5   -1.591278
6    0.529890
7   -0.210444
8    1.117367
9    1.471539
dtype: float64

In [55]:
ser2.sort_values()

5   -1.591278
0   -1.104315
7   -0.210444
1   -0.179343
2   -0.151797
6    0.529890
4    0.649632
3    0.701766
8    1.117367
9    1.471539
dtype: float64

In [56]:
ser2.rank()

0     2.0
1     4.0
2     5.0
3     8.0
4     7.0
5     1.0
6     6.0
7     3.0
8     9.0
9    10.0
dtype: float64

In [57]:
ser3 = Series(randn(10))
ser3

0    1.104093
1    1.066092
2   -0.206875
3   -0.404884
4    0.275043
5   -0.464518
6    2.503046
7    0.110962
8    0.884403
9   -0.959176
dtype: float64

In [58]:
ser3.rank()

0     9.0
1     8.0
2     4.0
3     3.0
4     6.0
5     2.0
6    10.0
7     5.0
8     7.0
9     1.0
dtype: float64

In [60]:
ser3.sort_values()

9   -0.959176
5   -0.464518
3   -0.404884
2   -0.206875
7    0.110962
4    0.275043
8    0.884403
1    1.066092
0    1.104093
6    2.503046
dtype: float64