# Basics of pandas

let us start by importing pandas with an alias pd

In [3]:
import pandas as pd

Know the version of pandas using pd.__version__ command

In [4]:
pd.__version__

'0.20.1'

Let us also import some supporting modules such as numpy and matplotlib.

In [5]:
import numpy as np
import matplotlib.pyplot as plt

Creating a series with index and assigned values

In [8]:
s = pd.Series([1,3,5,np.nan,6,8])
print (s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


Creating datetime index in dates variable

In [9]:
dates = pd.date_range('20171207',periods=6)
print(dates)

DatetimeIndex(['2017-12-07', '2017-12-08', '2017-12-09', '2017-12-10',
               '2017-12-11', '2017-12-12'],
              dtype='datetime64[ns]', freq='D')


Create a dataframe with dates as index and specified columns using numpy arrays

In [12]:
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns = list('ABCD'))
print (df)

                   A         B         C         D
2017-12-07 -1.590891  0.470428  0.757403 -0.782416
2017-12-08 -0.589836 -1.477910 -0.246581  1.495913
2017-12-09 -1.531725  0.906206  0.680724  1.488736
2017-12-10 -0.827291  0.305642  0.176794 -1.116974
2017-12-11 -0.102081 -0.240566  0.949629  0.708351
2017-12-12 -1.675654  0.410754 -1.569903  0.219077


reate a dataframe using dict keys

In [21]:
df2 = pd.DataFrame({'A': 1.,
                    'B': dates,
                    'C': 3,
                    'D': 4,
                    'E':'foo'})
print (df2)

     A          B  C  D    E
0  1.0 2017-12-07  3  4  foo
1  1.0 2017-12-08  3  4  foo
2  1.0 2017-12-09  3  4  foo
3  1.0 2017-12-10  3  4  foo
4  1.0 2017-12-11  3  4  foo
5  1.0 2017-12-12  3  4  foo


Checking datatypes

In [22]:
df2.dtypes

A           float64
B    datetime64[ns]
C             int64
D             int64
E            object
dtype: object

Accessing a column in a dataframe by calling the dataframe.

In [23]:
df2.B

0   2017-12-07
1   2017-12-08
2   2017-12-09
3   2017-12-10
4   2017-12-11
5   2017-12-12
Name: B, dtype: datetime64[ns]

Accessing a column in a dataframe by indexing the dataframe.

In [24]:
df2['B']

0   2017-12-07
1   2017-12-08
2   2017-12-09
3   2017-12-10
4   2017-12-11
5   2017-12-12
Name: B, dtype: datetime64[ns]

In [25]:
print (df2)

     A          B  C  D    E
0  1.0 2017-12-07  3  4  foo
1  1.0 2017-12-08  3  4  foo
2  1.0 2017-12-09  3  4  foo
3  1.0 2017-12-10  3  4  foo
4  1.0 2017-12-11  3  4  foo
5  1.0 2017-12-12  3  4  foo


View the head of dataframe df and df2

In [26]:
df.head(),df2.head()

(                   A         B         C         D
 2017-12-07 -1.590891  0.470428  0.757403 -0.782416
 2017-12-08 -0.589836 -1.477910 -0.246581  1.495913
 2017-12-09 -1.531725  0.906206  0.680724  1.488736
 2017-12-10 -0.827291  0.305642  0.176794 -1.116974
 2017-12-11 -0.102081 -0.240566  0.949629  0.708351,
      A          B  C  D    E
 0  1.0 2017-12-07  3  4  foo
 1  1.0 2017-12-08  3  4  foo
 2  1.0 2017-12-09  3  4  foo
 3  1.0 2017-12-10  3  4  foo
 4  1.0 2017-12-11  3  4  foo)

View tail of dataframe

In [28]:
df.tail(2)

Unnamed: 0,A,B,C,D
2017-12-11,-0.102081,-0.240566,0.949629,0.708351
2017-12-12,-1.675654,0.410754,-1.569903,0.219077


Get the index,columns and values

In [30]:
df.index

DatetimeIndex(['2017-12-07', '2017-12-08', '2017-12-09', '2017-12-10',
               '2017-12-11', '2017-12-12'],
              dtype='datetime64[ns]', freq='D')

In [31]:
df.columns,df2.columns

(Index(['A', 'B', 'C', 'D'], dtype='object'),
 Index(['A', 'B', 'C', 'D', 'E'], dtype='object'))

In [32]:
df.values

array([[-1.59089129,  0.47042828,  0.75740295, -0.78241565],
       [-0.58983586, -1.4779103 , -0.24658111,  1.49591321],
       [-1.53172494,  0.90620592,  0.680724  ,  1.48873564],
       [-0.82729062,  0.30564243,  0.1767941 , -1.11697355],
       [-0.10208113, -0.24056567,  0.94962877,  0.70835149],
       [-1.67565402,  0.41075356, -1.56990339,  0.21907695]])

In [33]:
df2.values,df.index,df2.columns

(array([[1.0, Timestamp('2017-12-07 00:00:00'), 3, 4, 'foo'],
        [1.0, Timestamp('2017-12-08 00:00:00'), 3, 4, 'foo'],
        [1.0, Timestamp('2017-12-09 00:00:00'), 3, 4, 'foo'],
        [1.0, Timestamp('2017-12-10 00:00:00'), 3, 4, 'foo'],
        [1.0, Timestamp('2017-12-11 00:00:00'), 3, 4, 'foo'],
        [1.0, Timestamp('2017-12-12 00:00:00'), 3, 4, 'foo']], dtype=object),
 DatetimeIndex(['2017-12-07', '2017-12-08', '2017-12-09', '2017-12-10',
                '2017-12-11', '2017-12-12'],
               dtype='datetime64[ns]', freq='D'),
 Index(['A', 'B', 'C', 'D', 'E'], dtype='object'))

Describe shows a quick summary of data

In [34]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-1.052913,0.062426,0.124678,0.335448
std,0.644347,0.839447,0.938321,1.11252
min,-1.675654,-1.47791,-1.569903,-1.116974
25%,-1.5761,-0.104014,-0.140737,-0.532042
50%,-1.179508,0.358198,0.428759,0.463714
75%,-0.6492,0.45551,0.738233,1.29364
max,-0.102081,0.906206,0.949629,1.495913


Getting transpose

In [36]:
df.T

Unnamed: 0,2017-12-07 00:00:00,2017-12-08 00:00:00,2017-12-09 00:00:00,2017-12-10 00:00:00,2017-12-11 00:00:00,2017-12-12 00:00:00
A,-1.590891,-0.589836,-1.531725,-0.827291,-0.102081,-1.675654
B,0.470428,-1.47791,0.906206,0.305642,-0.240566,0.410754
C,0.757403,-0.246581,0.680724,0.176794,0.949629,-1.569903
D,-0.782416,1.495913,1.488736,-1.116974,0.708351,0.219077


Sorting by axis

Note that keeping ascending as true doesn't yield us any result and axis means that it is just flipping up the array

In [None]:
df.sort_index(axis=1,ascending=False)

Sort by values

In [46]:
df.sort_values('B')

Unnamed: 0,A,B,C,D
2017-12-08,-0.589836,-1.47791,-0.246581,1.495913
2017-12-11,-0.102081,-0.240566,0.949629,0.708351
2017-12-10,-0.827291,0.305642,0.176794,-1.116974
2017-12-12,-1.675654,0.410754,-1.569903,0.219077
2017-12-07,-1.590891,0.470428,0.757403,-0.782416
2017-12-09,-1.531725,0.906206,0.680724,1.488736


Selection of a single column

In [47]:
df['C']

2017-12-07    0.757403
2017-12-08   -0.246581
2017-12-09    0.680724
2017-12-10    0.176794
2017-12-11    0.949629
2017-12-12   -1.569903
Freq: D, Name: C, dtype: float64

##### slicing

In [81]:
df[0:2]

Unnamed: 0,A,B,C,D
2017-12-07,0.351744,-1.245982,-1.422195,0.137432
2017-12-08,1.140735,0.285535,0.779562,1.413234


you can also slice by giving values

In [82]:
df['20171208':'20171211']

Unnamed: 0,A,B,C,D
2017-12-08,1.140735,0.285535,0.779562,1.413234
2017-12-09,-0.237926,0.905504,0.216921,-0.463655
2017-12-10,-1.289273,0.641844,0.006236,-1.171816
2017-12-11,0.089711,0.880766,0.298425,0.553579


In [92]:
df[5:6]

Unnamed: 0,A,B,C,D
2017-12-12,0.455151,-1.062723,1.446142,0.248972


selection of labels

In [48]:
df.loc[dates[5]]

A   -1.675654
B    0.410754
C   -1.569903
D    0.219077
Name: 2017-12-12 00:00:00, dtype: float64

selection of multiaxis by label

In [49]:
df.loc[:,['A','C']]

Unnamed: 0,A,C
2017-12-07,-1.590891,0.757403
2017-12-08,-0.589836,-0.246581
2017-12-09,-1.531725,0.680724
2017-12-10,-0.827291,0.176794
2017-12-11,-0.102081,0.949629
2017-12-12,-1.675654,-1.569903


selection of particular indexes and columns

df.loc[var[index],[columns]] form

In [51]:
df.loc[dates[1:3],['A','D']]

Unnamed: 0,A,D
2017-12-08,-0.589836,1.495913
2017-12-09,-1.531725,1.488736


selection of particular indices and columns original method

In [52]:
df.loc['20171208',['B','D']]

B   -1.477910
D    1.495913
Name: 2017-12-08 00:00:00, dtype: float64

In [53]:
df.loc['20171207':'20171209',['B','C']]

Unnamed: 0,B,C
2017-12-07,0.470428,0.757403
2017-12-08,-1.47791,-0.246581
2017-12-09,0.906206,0.680724


for getting a scalar value

In [55]:
df.loc[dates[1],'A']

-0.5898358613858028

In [56]:
df.loc[dates[1],['A']]

A   -0.589836
Name: 2017-12-08 00:00:00, dtype: float64

note the difference between above two outputs

for getting faster access to scalar

In [57]:
df.at[dates[1],'A']

-0.5898358613858028

please note that above command doesn't when you give the columns as a list.

selection by position(row index)

In [60]:
df.iloc[3]

A   -0.827291
B    0.305642
C    0.176794
D   -1.116974
Name: 2017-12-10 00:00:00, dtype: float64

In [61]:
df.tail(1),df.head()

(                   A         B         C         D
 2017-12-12 -1.675654  0.410754 -1.569903  0.219077,
                    A         B         C         D
 2017-12-07 -1.590891  0.470428  0.757403 -0.782416
 2017-12-08 -0.589836 -1.477910 -0.246581  1.495913
 2017-12-09 -1.531725  0.906206  0.680724  1.488736
 2017-12-10 -0.827291  0.305642  0.176794 -1.116974
 2017-12-11 -0.102081 -0.240566  0.949629  0.708351)

integer slices act similar to numpy

In [161]:
df.iloc[1:3,2:4]

Unnamed: 0,C,D
2017-12-08,0.779562,1.413234
2017-12-09,0.216921,-0.463655



you can access reguired rows and columns with their positions

In [117]:
df.iloc[[1,3,4],[1,2]]

Unnamed: 0,B,C
2017-12-08,0.285535,0.779562
2017-12-10,0.641844,0.006236
2017-12-11,0.880766,0.298425


for slicing rows explicitly

In [119]:
df.iloc[1:3,:]

Unnamed: 0,A,B,C,D
2017-12-08,1.140735,0.285535,0.779562,1.413234
2017-12-09,-0.237926,0.905504,0.216921,-0.463655



for getting columns explicitly

In [120]:
df.iloc[:,1:3]

Unnamed: 0,B,C
2017-12-07,-1.245982,-1.422195
2017-12-08,0.285535,0.779562
2017-12-09,0.905504,0.216921
2017-12-10,0.641844,0.006236
2017-12-11,0.880766,0.298425
2017-12-12,-1.062723,1.446142



for getting value explicitly

In [62]:
df.iloc[1,1]

-1.4779102969631106

for getting quick access to a particular value(using at function which we got by prior method)

In [123]:
df.iat[1,1]

0.28553495414554625

In [63]:
df.iat[2,2]

0.68072399831677033

select a value comparing with the single variable data

In [67]:
df[df.A>-1]

Unnamed: 0,A,B,C,D
2017-12-08,-0.589836,-1.47791,-0.246581,1.495913
2017-12-10,-0.827291,0.305642,0.176794,-1.116974
2017-12-11,-0.102081,-0.240566,0.949629,0.708351



making all the values below zero not a number NaN;but why does it called where function

In [69]:
df[df<0]

Unnamed: 0,A,B,C,D
2017-12-07,-1.590891,,,-0.782416
2017-12-08,-0.589836,-1.47791,-0.246581,
2017-12-09,-1.531725,,,
2017-12-10,-0.827291,,,-1.116974
2017-12-11,-0.102081,-0.240566,,
2017-12-12,-1.675654,,-1.569903,


In [70]:
df2 = df.copy()

adding a new column to already existing columns

In [71]:
df2['E']=['one','two','three','four','five','two']

In [72]:
print (df2)

                   A         B         C         D      E
2017-12-07 -1.590891  0.470428  0.757403 -0.782416    one
2017-12-08 -0.589836 -1.477910 -0.246581  1.495913    two
2017-12-09 -1.531725  0.906206  0.680724  1.488736  three
2017-12-10 -0.827291  0.305642  0.176794 -1.116974   four
2017-12-11 -0.102081 -0.240566  0.949629  0.708351   five
2017-12-12 -1.675654  0.410754 -1.569903  0.219077    two


using isin function

In [73]:
df2[df2['E'].isin(['one','two'])]

Unnamed: 0,A,B,C,D,E
2017-12-07,-1.590891,0.470428,0.757403,-0.782416,one
2017-12-08,-0.589836,-1.47791,-0.246581,1.495913,two
2017-12-12,-1.675654,0.410754,-1.569903,0.219077,two


Let us add another column

In [74]:
sl = pd.Series(np.random.randn(6),index=pd.date_range('20171207',periods=6))
print (sl)

2017-12-07   -1.463276
2017-12-08    0.550015
2017-12-09    0.531126
2017-12-10    1.039805
2017-12-11   -0.677960
2017-12-12   -0.619496
Freq: D, dtype: float64


In [75]:
df['F']=sl

In [76]:
print (df)

                   A         B         C         D         F
2017-12-07 -1.590891  0.470428  0.757403 -0.782416 -1.463276
2017-12-08 -0.589836 -1.477910 -0.246581  1.495913  0.550015
2017-12-09 -1.531725  0.906206  0.680724  1.488736  0.531126
2017-12-10 -0.827291  0.305642  0.176794 -1.116974  1.039805
2017-12-11 -0.102081 -0.240566  0.949629  0.708351 -0.677960
2017-12-12 -1.675654  0.410754 -1.569903  0.219077 -0.619496


In [77]:
#this is modifying by index

df.at[dates[0],'A']= 0

In [79]:
print (df)

                   A         B         C         D         F
2017-12-07  0.000000  0.470428  0.757403 -0.782416 -1.463276
2017-12-08 -0.589836 -1.477910 -0.246581  1.495913  0.550015
2017-12-09 -1.531725  0.906206  0.680724  1.488736  0.531126
2017-12-10 -0.827291  0.305642  0.176794 -1.116974  1.039805
2017-12-11 -0.102081 -0.240566  0.949629  0.708351 -0.677960
2017-12-12 -1.675654  0.410754 -1.569903  0.219077 -0.619496


This is how you drop columns or rows from a data frame

In [81]:
df.drop(df.columns[4],axis=1)

Unnamed: 0,A,B,C,D
2017-12-07,0.0,0.470428,0.757403,-0.782416
2017-12-08,-0.589836,-1.47791,-0.246581,1.495913
2017-12-09,-1.531725,0.906206,0.680724,1.488736
2017-12-10,-0.827291,0.305642,0.176794,-1.116974
2017-12-11,-0.102081,-0.240566,0.949629,0.708351
2017-12-12,-1.675654,0.410754,-1.569903,0.219077


This is modifying a dataframe by using position

In [82]:
df.iat[0,1] = 0

In [83]:
print (df)

                   A         B         C         D         F
2017-12-07  0.000000  0.000000  0.757403 -0.782416 -1.463276
2017-12-08 -0.589836 -1.477910 -0.246581  1.495913  0.550015
2017-12-09 -1.531725  0.906206  0.680724  1.488736  0.531126
2017-12-10 -0.827291  0.305642  0.176794 -1.116974  1.039805
2017-12-11 -0.102081 -0.240566  0.949629  0.708351 -0.677960
2017-12-12 -1.675654  0.410754 -1.569903  0.219077 -0.619496


In [89]:
# Dropping a column.We need to use inplace hyper parameter to drop the column inplace.

df.drop(df.columns[4],axis=1)

Unnamed: 0,A,B,C,D
2017-12-07,0.0,0.0,0.757403,-0.782416
2017-12-08,-0.589836,-1.47791,-0.246581,1.495913
2017-12-09,-1.531725,0.906206,0.680724,1.488736
2017-12-10,-0.827291,0.305642,0.176794,-1.116974
2017-12-11,-0.102081,-0.240566,0.949629,0.708351
2017-12-12,-1.675654,0.410754,-1.569903,0.219077


In [90]:
print (df)

                   A         B         C         D         F
2017-12-07  0.000000  0.000000  0.757403 -0.782416 -1.463276
2017-12-08 -0.589836 -1.477910 -0.246581  1.495913  0.550015
2017-12-09 -1.531725  0.906206  0.680724  1.488736  0.531126
2017-12-10 -0.827291  0.305642  0.176794 -1.116974  1.039805
2017-12-11 -0.102081 -0.240566  0.949629  0.708351 -0.677960
2017-12-12 -1.675654  0.410754 -1.569903  0.219077 -0.619496


In [91]:
df2 = df.copy()

In [203]:
df2

Unnamed: 0,A,B,C,D,F,"(2017-12-07 00:00:00, A)"
2017-12-07,0.0,0.0,-1.422195,0.137432,-0.204341,0
2017-12-08,1.140735,0.285535,0.779562,1.413234,-0.239184,0
2017-12-09,-0.237926,0.905504,0.216921,-0.463655,0.836868,0
2017-12-10,-1.289273,0.641844,0.006236,-1.171816,-0.778998,0
2017-12-11,0.089711,0.880766,0.298425,0.553579,-0.443587,0
2017-12-12,0.455151,-1.062723,1.446142,0.248972,0.357825,0


In [92]:
df2 = df2.drop(df.columns[4],axis=1)
print (df2)

                   A         B         C         D
2017-12-07  0.000000  0.000000  0.757403 -0.782416
2017-12-08 -0.589836 -1.477910 -0.246581  1.495913
2017-12-09 -1.531725  0.906206  0.680724  1.488736
2017-12-10 -0.827291  0.305642  0.176794 -1.116974
2017-12-11 -0.102081 -0.240566  0.949629  0.708351
2017-12-12 -1.675654  0.410754 -1.569903  0.219077


In [93]:
dates = pd.date_range('20170712',periods=6)

In [94]:
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))
print (df)

                   A         B         C         D
2017-07-12 -0.549142  1.243369 -0.609412  1.577952
2017-07-13  0.525374  1.712230 -0.719859  0.606434
2017-07-14  0.672185 -0.609259  0.719651  1.466960
2017-07-15 -1.693815  0.474479  0.298744 -0.841066
2017-07-16 -0.852702 -1.668808  0.951331  1.543137
2017-07-17 -0.991764  1.024736  1.381660  0.933204


assigning a value to a data point when the point is located with its index

In [95]:
df.at[dates[0],'A'] = 0

Replacing duplicate value aganist existing value whose position is located by position

In [96]:
df.iat[0,1] = 0

Replacing a column using arrays

In [97]:
df.loc[:,'D']=np.array([3]*len(df))

In [98]:
print (df)

                   A         B         C  D
2017-07-12  0.000000  0.000000 -0.609412  3
2017-07-13  0.525374  1.712230 -0.719859  3
2017-07-14  0.672185 -0.609259  0.719651  3
2017-07-15 -1.693815  0.474479  0.298744  3
2017-07-16 -0.852702 -1.668808  0.951331  3
2017-07-17 -0.991764  1.024736  1.381660  3


In [99]:
df.iloc[:,3:] = np.array([4]*len(df))
print (df)

                   A         B         C  D
2017-07-12  0.000000  0.000000 -0.609412  4
2017-07-13  0.525374  1.712230 -0.719859  4
2017-07-14  0.672185 -0.609259  0.719651  4
2017-07-15 -1.693815  0.474479  0.298744  4
2017-07-16 -0.852702 -1.668808  0.951331  4
2017-07-17 -0.991764  1.024736  1.381660  4


In [100]:
df2 = df.copy()

In [101]:
df2[df2>0] = -df2

In [102]:
print (df2)

                   A         B         C  D
2017-07-12  0.000000  0.000000 -0.609412 -4
2017-07-13 -0.525374 -1.712230 -0.719859 -4
2017-07-14 -0.672185 -0.609259 -0.719651 -4
2017-07-15 -1.693815 -0.474479 -0.298744 -4
2017-07-16 -0.852702 -1.668808 -0.951331 -4
2017-07-17 -0.991764 -1.024736 -1.381660 -4


In [103]:
df2['E'] = ['anil','dharni','kumar','patel','anil','kumar']

In [104]:
print (df2)

                   A         B         C  D       E
2017-07-12  0.000000  0.000000 -0.609412 -4    anil
2017-07-13 -0.525374 -1.712230 -0.719859 -4  dharni
2017-07-14 -0.672185 -0.609259 -0.719651 -4   kumar
2017-07-15 -1.693815 -0.474479 -0.298744 -4   patel
2017-07-16 -0.852702 -1.668808 -0.951331 -4    anil
2017-07-17 -0.991764 -1.024736 -1.381660 -4   kumar


In [105]:
df['E'] = ['anil','dharni','kumar','patel','anil','kumar']

In [107]:
print (df)

                   A         B         C  D       E
2017-07-12  0.000000  0.000000 -0.609412  4    anil
2017-07-13  0.525374  1.712230 -0.719859  4  dharni
2017-07-14  0.672185 -0.609259  0.719651  4   kumar
2017-07-15 -1.693815  0.474479  0.298744  4   patel
2017-07-16 -0.852702 -1.668808  0.951331  4    anil
2017-07-17 -0.991764  1.024736  1.381660  4   kumar


##### Reindexing

In [108]:
df1 = df.reindex(index=dates[0:4],columns=list(df.columns)+['E'])
print (df1)

                   A         B         C  D       E       E
2017-07-12  0.000000  0.000000 -0.609412  4    anil    anil
2017-07-13  0.525374  1.712230 -0.719859  4  dharni  dharni
2017-07-14  0.672185 -0.609259  0.719651  4   kumar   kumar
2017-07-15 -1.693815  0.474479  0.298744  4   patel   patel


In [109]:
df1.loc[dates[0]:dates[1],'E'] = 1

In [269]:
df1.iloc[[2,3],[4,5]] = np.nan

In [270]:
df1

Unnamed: 0,A,B,C,D,E,E.1
2017-07-12,0.0,0.0,1.899583,4,1.0,1.0
2017-07-13,-0.03082,-0.608745,0.556991,4,1.0,1.0
2017-07-14,0.38727,0.237818,-1.24995,4,,
2017-07-15,0.357507,0.303881,1.445231,4,,


Why this same command is not working and it is throwing error

In [278]:
df1.iloc[[2,3],[4,5]] = np.nan

This dropna command is used to drop the values that are not a number(NaN)

In [112]:
df1.dropna(how='any')

Unnamed: 0,A,B,C,D,E,E.1
2017-07-12,0.0,0.0,-0.609412,4,1,1
2017-07-13,0.525374,1.71223,-0.719859,4,1,1
2017-07-14,0.672185,-0.609259,0.719651,4,kumar,kumar
2017-07-15,-1.693815,0.474479,0.298744,4,patel,patel


In [113]:
#this is to specify values in the place of NaN
df1.fillna(value=3)

Unnamed: 0,A,B,C,D,E,E.1
2017-07-12,0.0,0.0,-0.609412,4,1,1
2017-07-13,0.525374,1.71223,-0.719859,4,1,1
2017-07-14,0.672185,-0.609259,0.719651,4,kumar,kumar
2017-07-15,-1.693815,0.474479,0.298744,4,patel,patel


In [114]:
#this gives boolean response where there is NaN
pd.isnull(df1)

Unnamed: 0,A,B,C,D,E,E.1
2017-07-12,False,False,False,False,False,False
2017-07-13,False,False,False,False,False,False
2017-07-14,False,False,False,False,False,False
2017-07-15,False,False,False,False,False,False


In [115]:
#calculating mean
df.mean()

#performances in general skip missing data and calculate mean

A   -0.390120
B    0.155563
C    0.337019
D    4.000000
dtype: float64

In [116]:
#calculating mean on different axis
df.mean(1)

2017-07-12    0.847647
2017-07-13    1.379436
2017-07-14    1.195644
2017-07-15    0.769852
2017-07-16    0.607455
2017-07-17    1.353658
Freq: D, dtype: float64

In [117]:
s=pd.Series([1,3,np.nan,np.nan,6,7],index=dates)

In [131]:
print (s)

2017-07-12    1.0
2017-07-13    3.0
2017-07-14    NaN
2017-07-15    NaN
2017-07-16    6.0
2017-07-17    7.0
Freq: D, dtype: float64


In [132]:
df

Unnamed: 0,A,B,C,D,E,s
2017-07-12,0.0,0.0,-0.609412,4,anil,1.0
2017-07-13,0.525374,1.71223,-0.719859,4,dharni,3.0
2017-07-14,0.672185,-0.609259,0.719651,4,kumar,1.0
2017-07-15,-1.693815,0.474479,0.298744,4,patel,1.0
2017-07-16,-0.852702,-1.668808,0.951331,4,anil,6.0
2017-07-17,-0.991764,1.024736,1.38166,4,kumar,7.0
