In [1]:
import pandas as pd
import numpy as np

In [4]:
df = pd.DataFrame(np.random.randint(10,size=(3,4)), columns=['A','B','C','D'], index=['a','b','c'])

In [5]:
df

Unnamed: 0,A,B,C,D
a,8,8,1,1
b,9,1,2,4
c,9,5,1,8


In [6]:
np.exp(df)

Unnamed: 0,A,B,C,D
a,2980.957987,2980.957987,2.718282,2.718282
b,8103.083928,2.718282,7.389056,54.59815
c,8103.083928,148.413159,2.718282,2980.957987


In [7]:
df

Unnamed: 0,A,B,C,D
a,8,8,1,1
b,9,1,2,4
c,9,5,1,8


In [8]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                  'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
                        'New York': 19651127}, name='population')

In [9]:
area

Alaska        1723337
California     423967
Texas          695662
Name: area, dtype: int64

In [10]:
population

California    38332521
New York      19651127
Texas         26448193
Name: population, dtype: int64

In [11]:
population / area

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64

In [13]:
area.index & population.index

Index([u'California', u'Texas'], dtype='object')

In [15]:
area.index.difference(area.index & population.index)

Index([u'Alaska'], dtype='object')

In [16]:
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])

In [17]:
A

0    2
1    4
2    6
dtype: int64

In [18]:
B

1    1
2    3
3    5
dtype: int64

In [19]:
A + B

0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

In [20]:
A.add(B, fill_value=0.0)

0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64

In [21]:
A

0    2
1    4
2    6
dtype: int64

In [22]:
B

1    1
2    3
3    5
dtype: int64

In [23]:
A

0    2
1    4
2    6
dtype: int64

In [34]:
rng = np.random.RandomState(10)

In [36]:
rng.randint(0,20, (2,2))

array([[17, 16],
       [17,  8]])

In [27]:
rng

<mtrand.RandomState at 0x8b019b0>

In [38]:
A = pd.DataFrame(rng.randint(0,20, (2,2)), columns=list('AB'))

In [39]:
A

Unnamed: 0,A,B
0,4,19
1,16,4


In [41]:
A.stack().mean()

10.75

In [42]:
A.mean()

A    10.0
B    11.5
dtype: float64

In [43]:
A

Unnamed: 0,A,B
0,4,19
1,16,4


In [44]:
B = pd.DataFrame(rng.randint(0, 10, (3, 3)),
                 columns=list('BAC'))

In [45]:
B

Unnamed: 0,B,A,C
0,6,8,1
1,8,4,1
2,3,6,5


In [47]:
C = A + B

In [48]:
C.mean()

A    16.0
B    18.5
C     NaN
dtype: float64

In [49]:
C

Unnamed: 0,A,B,C
0,12.0,25.0,
1,20.0,12.0,
2,,,


In [57]:
C.stack().mean()

17.25

In [58]:
C.add(A,fill_value=17.5)

Unnamed: 0,A,B,C
0,16.0,44.0,
1,36.0,16.0,
2,,,


In [53]:
A

Unnamed: 0,A,B
0,4,19
1,16,4


In [54]:
B

Unnamed: 0,B,A,C
0,6,8,1
1,8,4,1
2,3,6,5


In [60]:
A

Unnamed: 0,A,B
0,4,19
1,16,4


In [61]:
B

Unnamed: 0,B,A,C
0,6,8,1
1,8,4,1
2,3,6,5


In [62]:
A.add(B,fill_value=10)

Unnamed: 0,A,B,C
0,12.0,25.0,11.0
1,20.0,12.0,11.0
2,16.0,13.0,15.0


In [63]:
A

Unnamed: 0,A,B
0,4,19
1,16,4


In [64]:
B

Unnamed: 0,B,A,C
0,6,8,1
1,8,4,1
2,3,6,5


In [65]:
A ** B

Unnamed: 0,A,B,C
0,65536.0,47045881.0,
1,65536.0,65536.0,
2,,,


In [66]:
A

Unnamed: 0,A,B
0,4,19
1,16,4


In [67]:
B

Unnamed: 0,B,A,C
0,6,8,1
1,8,4,1
2,3,6,5


## Handling missing Data

In [69]:
d = np.array([1,2,None,4])

In [71]:
d.sum()

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

In [73]:
s = np.array([1,2,np.nan,3])

In [74]:
s.sum()

nan

In [75]:
np.nansum(s)

6.0

In [77]:
d = pd.Series([1,2,np.nan,3,None])

In [78]:
d

0    1.0
1    2.0
2    NaN
3    3.0
4    NaN
dtype: float64

In [80]:
d.isnull()

0    False
1    False
2     True
3    False
4     True
dtype: bool

In [81]:
d.notnull()

0     True
1     True
2    False
3     True
4    False
dtype: bool

In [82]:
d[d.notnull()]

0    1.0
1    2.0
3    3.0
dtype: float64

In [83]:
d.dropna()

0    1.0
1    2.0
3    3.0
dtype: float64

In [84]:
d

0    1.0
1    2.0
2    NaN
3    3.0
4    NaN
dtype: float64

In [85]:
d = d.dropna()

In [86]:
d

0    1.0
1    2.0
3    3.0
dtype: float64

In [87]:
d = pd.Series([1,2,np.nan,3,None])

In [88]:
d.fillna(0)

0    1.0
1    2.0
2    0.0
3    3.0
4    0.0
dtype: float64

In [89]:
d.fillna(method='ffill')

0    1.0
1    2.0
2    2.0
3    3.0
4    3.0
dtype: float64