In [2]:
import numpy as np
import pandas as pd

In [3]:
data = pd.Series([1,2,np.nan,4,5])

In [6]:
data[data.notnull()]

0    1.0
1    2.0
3    4.0
4    5.0
dtype: float64

In [7]:
data = pd.DataFrame([ [1,2,np.nan],
               [np.nan,3,1],
               [5,6,7]])

In [9]:
data

Unnamed: 0,0,1,2
0,1.0,2,
1,,3,1.0
2,5.0,6,7.0


In [10]:
data.dropna()

Unnamed: 0,0,1,2
2,5.0,6,7.0


In [12]:
data.dropna(axis='columns')

Unnamed: 0,1
0,2
1,3
2,6


In [13]:
data[3] = np.nan

In [14]:
data

Unnamed: 0,0,1,2,3
0,1.0,2,,
1,,3,1.0,
2,5.0,6,7.0,


In [16]:
#removes all the rows which have less thn 3 non-na
data.dropna(thresh=3)

Unnamed: 0,0,1,2,3
2,5.0,6,7.0,


In [17]:
data = pd.Series([1,2,3,np.nan, 4,None])

In [18]:
data

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    NaN
dtype: float64

In [19]:
data.fillna(0)

0    1.0
1    2.0
2    3.0
3    0.0
4    4.0
5    0.0
dtype: float64

In [20]:
help(data.fillna)

Help on method fillna in module pandas.core.series:

fillna(value=None, method=None, axis=None, inplace=False, limit=None, downcast=None, **kwargs) method of pandas.core.series.Series instance
    Fill NA/NaN values using the specified method
    
    Parameters
    ----------
    value : scalar, dict, Series, or DataFrame
        Value to use to fill holes (e.g. 0), alternately a
        dict/Series/DataFrame of values specifying which value to use for
        each index (for a Series) or column (for a DataFrame). (values not
        in the dict/Series/DataFrame will not be filled). This value cannot
        be a list.
    method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
        Method to use for filling holes in reindexed Series
        pad / ffill: propagate last valid observation forward to next valid
        backfill / bfill: use NEXT valid observation to fill gap
    axis : {0, 'index'}
    inplace : boolean, default False
        If True, fill in place. Note: t

In [21]:
data

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    NaN
dtype: float64

In [22]:
data = pd.DataFrame([ [1,2,np.nan],
               [np.nan,3,1],
               [5,6,7]])

In [23]:
data

Unnamed: 0,0,1,2
0,1.0,2,
1,,3,1.0
2,5.0,6,7.0


In [26]:
data = data.fillna(method='ffill',axis=1)

In [29]:
data = data.fillna(method='bfill')

In [30]:
data

Unnamed: 0,0,1,2
0,1.0,2.0,2.0
1,5.0,3.0,1.0
2,5.0,6.0,7.0


In [31]:
data = pd.DataFrame([ [1,2,np.nan],
               [np.nan,3,1],
               [5,6,7]])

In [32]:
data.fillna(method='ffill',axis=1).fillna(method='bfill')

Unnamed: 0,0,1,2
0,1.0,2.0,2.0
1,5.0,3.0,1.0
2,5.0,6.0,7.0


In [33]:
index = [('California', 2000), ('California', 2010),
         ('New York', 2000), ('New York', 2010),
         ('Texas', 2000), ('Texas', 2010)]

populations = [33871648, 37253956,
               18976457, 19378102,
               20851820, 25145561]

popl = pd.Series(populations, index=index)

In [35]:
popl.index

Index([('California', 2000), ('California', 2010),   ('New York', 2000),
         ('New York', 2010),      ('Texas', 2000),      ('Texas', 2010)],
      dtype='object')

In [36]:
popl[('California', 2000)]

33871648

In [46]:
popl[[i for i in popl.index if i[0] == 'California']]

(California, 2000)    33871648
(California, 2010)    37253956
dtype: int64

In [43]:
#List Comprehension - Generating list
[i*2 for i in range(10)]

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [47]:
pd.MultiIndex.from_tuples(index)

MultiIndex(levels=[['California', 'New York', 'Texas'], [2000, 2010]],
           labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])

In [51]:
index = [('California', 2000), ('California', 2010),
         ('New York', 2000), ('New York', 2010),
         ('Texas', 2000), ('Texas', 2010)]


In [54]:
muliidx = pd.MultiIndex.from_tuples(index)

In [55]:
popl

(California, 2000)    33871648
(California, 2010)    37253956
(New York, 2000)      18976457
(New York, 2010)      19378102
(Texas, 2000)         20851820
(Texas, 2010)         25145561
dtype: int64

In [57]:
new_data = popl.reindex(index=muliidx)

In [59]:
new_data[:,2010]

California    37253956
New York      19378102
Texas         25145561
dtype: int64

In [68]:
new_data['New York']

2000    18976457
2010    19378102
dtype: int64

In [72]:
df = new_data.unstack()

In [76]:
df.loc['Texas'][2010]

25145561

In [75]:
df[2010]

California    37253956
New York      19378102
Texas         25145561
Name: 2010, dtype: int64

In [77]:
df.iloc[1]

2000    18976457
2010    19378102
Name: New York, dtype: int64

In [78]:
df

Unnamed: 0,2000,2010
California,33871648,37253956
New York,18976457,19378102
Texas,20851820,25145561


In [79]:
df.loc['Texas']

2000    20851820
2010    25145561
Name: Texas, dtype: int64