In [2]:
# Series indexing (obj[...]) works analogously to NumPy array indexing

import pandas as pd
import numpy as np

In [3]:
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [4]:
obj['b'] # access by label

1.0

In [5]:
obj[1] # by index

1.0

In [6]:
obj[1:4] # slicing in pandas a  bit diff than normal python where end-point is inclusive

b    1.0
c    2.0
d    3.0
dtype: float64

In [7]:
obj[['b', 'a', 'd']]

b    1.0
a    0.0
d    3.0
dtype: float64

In [8]:
obj[[1, 3]]

b    1.0
d    3.0
dtype: float64

In [9]:
# filtering
obj[obj < 2]

a    0.0
b    1.0
dtype: float64

In [10]:
# setting using these methods modifies the corresponding sectuon of the series
obj['b':'c'] = 5
obj

a    0.0
b    5.0
c    5.0
d    3.0
dtype: float64

In [11]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)), 
        index=['Ohio', 'Colorado', 'Utah', 'New York'], 
        columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [12]:
data['two']

Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int64

In [13]:
data[['three', 'one']]

Unnamed: 0,three,one
Ohio,2,0
Colorado,6,4
Utah,10,8
New York,14,12


In [14]:
# slicing
data[:2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [15]:
data[data['three'] > 5]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [16]:
# scalar comparison:
data < 5

Unnamed: 0,one,two,three,four
Ohio,True,True,True,True
Colorado,True,False,False,False
Utah,False,False,False,False
New York,False,False,False,False


In [17]:
#assignment
data[data < 5] = 0
data

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [18]:
# SELECTION WITH loc AND iloc
# they enable us to select a subset of the rows and columns from a DataFrame with NumPy-like notation using either axis labels (loc) or integers (iloc)

In [19]:
# using loc for labels
data.loc['Colorado', ['two', 'three']]

two      5
three    6
Name: Colorado, dtype: int64

In [20]:
# using iloc for integers
data.iloc[2, [3, 0, 1]]

four    11
one      8
two      9
Name: Utah, dtype: int64

In [21]:
# iloc
data.iloc[2]

one       8
two       9
three    10
four     11
Name: Utah, dtype: int64

In [22]:
data.iloc[[1, 2], [0, 1, 2, 3]]

Unnamed: 0,one,two,three,four
Colorado,0,5,6,7
Utah,8,9,10,11


In [23]:
# both indexing functions work with slices in addition to single labels or lists of labels:
data.loc[:'Utah', 'two']

Ohio        0
Colorado    5
Utah        9
Name: two, dtype: int64

In [24]:
# both indexing functions work with slices in addition to single labels ir lists of labels
data.iloc[:, :3][data.three > 5]

Unnamed: 0,one,two,three
Colorado,0,5,6
Utah,8,9,10
New York,12,13,14


In [28]:
# integer indexes
ser = pd.Series(np.arange(3.))
ser

0    0.0
1    1.0
2    2.0
dtype: float64

In [32]:
# no ambiguity even we are using non-integer index
ser2 = pd.Series(np.arange(3.), index=['a', 'b', 'c'])
ser2[-1]

2.0

In [33]:
#to keep things consistent if we have an axis index containing integers, data selection will always be label-oriented. for more precise handling, use loc for labels or iloc for integers:
ser[:1]

0    0.0
dtype: float64

In [34]:
ser.loc[:1]

0    0.0
1    1.0
dtype: float64

In [35]:
ser.iloc[:1]

0    0.0
dtype: float64

In [41]:
# Arithmetic and Data Alignment
# when we adding together two objects which #index pairs are not the same, the respc index #in the result will be the union of the index #pair.
s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])
s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])

In [42]:
s1

a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64

In [43]:
s2

a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64

In [44]:
# adding these yields together
s1 + s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [46]:
# arithmetic and data alignment in DF
# in the case of DF, alignment is performed on both the rows and the columns
df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), columns=list('bcd'), index=['Ohio', 'Texas', 'Colorado'])
df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])

In [47]:
df1

Unnamed: 0,b,c,d
Ohio,0.0,1.0,2.0
Texas,3.0,4.0,5.0
Colorado,6.0,7.0,8.0


In [48]:
df2

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [50]:
# if we add these together returns a DF whose index and columns are the unions of the ones in eacg DF
# since the 'c' and 'e' columns are not found in both DF objects, they appear as all missing in the result. The same holds for the rows whose labels are not common to both objects

df1 + df2

Unnamed: 0,b,c,d,e
Colorado,,,,
Ohio,3.0,,6.0,
Oregon,,,,
Texas,9.0,,12.0,
Utah,,,,


In [51]:
# if we add dataframe objects with no column or row in common, the results contain all nulls:
df1 = pd.DataFrame({'A': [1, 2]})
df2 = pd.DataFrame({'B': [3, 4]})

In [52]:
df1

Unnamed: 0,A
0,1
1,2


In [53]:
df2

Unnamed: 0,B
0,3
1,4


In [54]:
# adding these two will resulting null
df1 + df2

Unnamed: 0,A,B
0,,
1,,
