## basic import

In [1]:
import pandas as pd

obj = pd.Series([1,2,3,4])
obj

0    1
1    2
2    3
3    4
dtype: int64

In [2]:
obj.values

array([1, 2, 3, 4], dtype=int64)

In [3]:
obj.index

RangeIndex(start=0, stop=4, step=1)

## self-defined index/label

In [4]:
obj = pd.Series([1,2,3,4], index=list('ABCD'))
obj

A    1
B    2
C    3
D    4
dtype: int64

In [5]:
obj.index

Index(['A', 'B', 'C', 'D'], dtype='object')

## Indexing and slicing

In [6]:
obj[0]

1

In [7]:
obj[0:3]

A    1
B    2
C    3
dtype: int64

In [8]:
obj[[0,2]]

A    1
C    3
dtype: int64

In [9]:
obj['A']

1

In [10]:
obj['A':'D']

A    1
B    2
C    3
D    4
dtype: int64

In [11]:
obj[['A','C']]

A    1
C    3
dtype: int64

In [12]:
obj.loc['C':]

C    3
D    4
dtype: int64

In [13]:
obj.iloc[1:3]

B    2
C    3
dtype: int64

In [14]:
obj

A    1
B    2
C    3
D    4
dtype: int64

In [15]:
obj > 2

A    False
B    False
C     True
D     True
dtype: bool

In [16]:
obj[obj>2]

C    3
D    4
dtype: int64

In [17]:
obj + 2

A    3
B    4
C    5
D    6
dtype: int64

In [18]:
obj % 2

A    1
B    0
C    1
D    0
dtype: int64

In [19]:
obj+obj

A    2
B    4
C    6
D    8
dtype: int64

## Series and Dict 

In [20]:
obj = pd.Series({'B':2,'C':1, 'D':3})
obj

B    2
C    1
D    3
dtype: int64

In [21]:
obj2 = pd.Series({'B':2,'C':1, 'D':3}, index=['A','B','C'])
obj2

A    NaN
B    2.0
C    1.0
dtype: float64

In [22]:
import numpy as np
np.nan

nan

In [23]:
obj3=obj+obj2

In [24]:
obj3

A    NaN
B    4.0
C    2.0
D    NaN
dtype: float64

In [25]:
pd.isnull(obj3)

A     True
B    False
C    False
D     True
dtype: bool

In [26]:
'A' in obj3

True

In [27]:
'X' in obj3

False

In [28]:
obj3['D'] = 8
obj3

A    NaN
B    4.0
C    2.0
D    8.0
dtype: float64

In [29]:
obj3['E'] = 10
obj3

A     NaN
B     4.0
C     2.0
D     8.0
E    10.0
dtype: float64

In [30]:
obj = pd.Series([1,2,3,4])
obj

0    1
1    2
2    3
3    4
dtype: int64

In [31]:
obj.index = ['One','Two','Three','Four']
obj

One      1
Two      2
Three    3
Four     4
dtype: int64

In [32]:
obj3.index = ['One','Two','Three','Four','Five']
obj3

One       NaN
Two       4.0
Three     2.0
Four      8.0
Five     10.0
dtype: float64

In [33]:
obj.index = ['One','Two','Three']

ValueError: Length mismatch: Expected axis has 4 elements, new values have 3 elements

## Series with different data type

In [34]:
obj4 = pd.Series({'A':[1,2],'B':3,'C':4})
obj4

A    [1, 2]
B         3
C         4
dtype: object

In [35]:
obj4.values

array([list([1, 2]), 3, 4], dtype=object)

# DataFrame 

## construction

In [37]:
# list of list
import numpy as np
df = pd.DataFrame([[1,2],[3,4]])
df

Unnamed: 0,0,1
0,1,2
1,3,4


In [38]:
# 2d array
import numpy as np
df = pd.DataFrame(np.array([[1,2],[3,4]]))
df

Unnamed: 0,0,1
0,1,2
1,3,4


In [39]:
data = {
    'name':['Alice','Tom','Steven'],
    'grade':[70,80,95],
    'gender':['M','F','M']
}
df = pd.DataFrame(data)
df

Unnamed: 0,name,grade,gender
0,Alice,70,M
1,Tom,80,F
2,Steven,95,M


In [40]:
df.head(2)

Unnamed: 0,name,grade,gender
0,Alice,70,M
1,Tom,80,F


In [41]:
df = pd.DataFrame(data, columns=['name','grade','number'])
df

Unnamed: 0,name,grade,number
0,Alice,70,
1,Tom,80,
2,Steven,95,


In [42]:
df = pd.DataFrame(data, columns=['name','grade','number'], index=['one','two','three'])
df

Unnamed: 0,name,grade,number
one,Alice,70,
two,Tom,80,
three,Steven,95,


In [43]:
df.name

one       Alice
two         Tom
three    Steven
Name: name, dtype: object

In [44]:
df['grade']

one      70
two      80
three    95
Name: grade, dtype: int64

In [45]:
df.loc['one']

name      Alice
grade        70
number      NaN
Name: one, dtype: object

In [46]:
df.iloc[0]

name      Alice
grade        70
number      NaN
Name: one, dtype: object

In [47]:
df

Unnamed: 0,name,grade,number
one,Alice,70,
two,Tom,80,
three,Steven,95,


In [48]:
df.grade = 100
df

Unnamed: 0,name,grade,number
one,Alice,100,
two,Tom,100,
three,Steven,100,


In [49]:
df.number = range(3)
df

Unnamed: 0,name,grade,number
one,Alice,100,0
two,Tom,100,1
three,Steven,100,2


In [50]:
s = pd.Series([1001,1002], index=['one','two'])
s

one    1001
two    1002
dtype: int64

In [51]:
df.number = s
df

Unnamed: 0,name,grade,number
one,Alice,100,1001.0
two,Tom,100,1002.0
three,Steven,100,


In [52]:
data = {
    'name':{1:'Alice',2:'Tom',3:'Steven'},
    'grade':{1:70,2:80,3:95},
}
df = pd.DataFrame(data)
df

Unnamed: 0,name,grade
1,Alice,70
2,Tom,80
3,Steven,95


In [53]:
df = pd.DataFrame(np.arange(9).reshape(3,3), index=list('abc'), columns=list('ABC'))
df

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [54]:
df.reindex(['a','b','d'])

Unnamed: 0,A,B,C
a,0.0,1.0,2.0
b,3.0,4.0,5.0
d,,,


In [55]:
df.reindex(columns=['A','C','D'])

Unnamed: 0,A,C,D
a,0,2,
b,3,5,
c,6,8,


In [56]:
ps = pd.Series(np.arange(5), index=list('ABCDE'))
ps

A    0
B    1
C    2
D    3
E    4
dtype: int32

In [57]:
ps.drop('A')

B    1
C    2
D    3
E    4
dtype: int32

In [58]:
ps

A    0
B    1
C    2
D    3
E    4
dtype: int32

In [59]:
ps.drop(['A','B'])

C    2
D    3
E    4
dtype: int32

In [60]:
df = pd.DataFrame(np.arange(9).reshape(3,3), index=['a','b','c'], columns=['one','two','three'])
df

Unnamed: 0,one,two,three
a,0,1,2
b,3,4,5
c,6,7,8


In [61]:
df.drop('a')

Unnamed: 0,one,two,three
b,3,4,5
c,6,7,8


In [62]:
df.drop(['a','b'])

Unnamed: 0,one,two,three
c,6,7,8


In [63]:
df.drop('one', axis=1)

Unnamed: 0,two,three
a,1,2
b,4,5
c,7,8


In [64]:
df

Unnamed: 0,one,two,three
a,0,1,2
b,3,4,5
c,6,7,8


In [65]:
df.drop('one', axis=1, inplace=True)
df

Unnamed: 0,two,three
a,1,2
b,4,5
c,7,8


In [66]:
df = pd.DataFrame(np.arange(9).reshape(3,3), index=['a','b','c'], columns=['one','two','three'])
df

Unnamed: 0,one,two,three
a,0,1,2
b,3,4,5
c,6,7,8


In [67]:
df['one']

a    0
b    3
c    6
Name: one, dtype: int32

In [68]:
df[['one','two']]

Unnamed: 0,one,two
a,0,1
b,3,4
c,6,7


In [69]:
df[1:]

Unnamed: 0,one,two,three
b,3,4,5
c,6,7,8


In [70]:
df[df['one']>0]

Unnamed: 0,one,two,three
b,3,4,5
c,6,7,8


In [71]:
df[df>5] = 5
df

Unnamed: 0,one,two,three
a,0,1,2
b,3,4,5
c,5,5,5


In [72]:
df = pd.DataFrame(np.arange(9).reshape(3,3), index=['a','b','c'], columns=['one','two','three'])
df

Unnamed: 0,one,two,three
a,0,1,2
b,3,4,5
c,6,7,8


In [73]:
df.loc['a',['one','two']]

one    0
two    1
Name: a, dtype: int32

In [74]:
df.loc[['a','c'], ['one','two']]

Unnamed: 0,one,two
a,0,1
c,6,7


In [75]:
df.loc[:'c','two']

a    1
b    4
c    7
Name: two, dtype: int32

In [76]:
df.iloc[0,[0,1]]

one    0
two    1
Name: a, dtype: int32

In [77]:
df.iloc[:2,2:]

Unnamed: 0,three
a,2
b,5


In [78]:
df.iloc[:2,2:][df>3]

Unnamed: 0,three
a,
b,5.0


In [79]:
df.at['b','one']

3

In [80]:
df.iat[1,0]

3

## Arithmetic and Data Alignment