In [1]:
import pandas as pd
import numpy as np

In [16]:
data = [[1.4, np.nan],
        [7.1, -4.5],
        [np.nan, np.nan],
        [0.75, -1.3]]
df = pd.DataFrame(data, columns=['one', 'two'], index=['a', 'b', 'c', 'd'])

In [5]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [6]:
df.sum(axis=0)

one    9.25
two   -5.80
dtype: float64

In [7]:
df.sum(axis = 1)

a    1.40
b    2.60
c     NaN
d   -0.55
dtype: float64

In [8]:
df.mean(axis=0)

one    3.083333
two   -2.900000
dtype: float64

In [11]:
df.mean(axis=1, skipna=False)

a      NaN
b    1.300
c      NaN
d   -0.275
dtype: float64

In [9]:
df['one'].sum()

9.25

In [10]:
df.loc['b'].sum()

2.5999999999999996

In [17]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [18]:
one_mean = df.mean(axis=0)['one']
two_min = df.min(axis=0)['two']

In [19]:
df['one'] = df['one'].fillna(value=one_mean)
df['two'] = df['two'].fillna(value=two_min)

In [20]:
df

Unnamed: 0,one,two
a,1.4,-4.5
b,7.1,-4.5
c,3.083333,-4.5
d,0.75,-1.3


In [21]:
df2 = pd.DataFrame(np.random.randn(6,4), columns=['A', 'B', 'C', 'D'], index=pd.date_range('20170701', periods=6))

In [22]:
df2

Unnamed: 0,A,B,C,D
2017-07-01,-2.286333,-0.052644,0.704624,0.42203
2017-07-02,-0.336621,0.521646,0.243727,0.572031
2017-07-03,-0.512866,-1.402907,-0.992022,0.263058
2017-07-04,-0.507892,-0.871233,0.330722,0.698941
2017-07-05,1.540193,1.331042,0.651229,1.683189
2017-07-06,1.670852,-1.09176,0.784282,-0.978019


In [23]:
df2['A'].corr(df2['B'])

0.15604871509601279

In [24]:
df2['B'].cov(df2['D'])

0.66017335053404569

In [25]:
df2.corr()

Unnamed: 0,A,B,C,D
A,1.0,0.156049,0.220997,-0.078239
B,0.156049,1.0,0.473611,0.730693
C,0.220997,0.473611,1.0,-0.009842
D,-0.078239,0.730693,-0.009842,1.0


In [26]:
df2.cov()

Unnamed: 0,A,B,C,D
A,2.198872,0.243948,0.217082,-0.099428
B,0.243948,1.111408,0.330747,0.660173
C,0.217082,0.330747,0.438809,-0.005587
D,-0.099428,0.660173,-0.005587,0.734467


In [27]:
dates = df2.index
random_dates = np.random.permutation(dates)
df2 = df2.reindex(index=random_dates, columns=['D', 'B', 'C', 'A'])

In [28]:
df2

Unnamed: 0,D,B,C,A
2017-07-05,1.683189,1.331042,0.651229,1.540193
2017-07-06,-0.978019,-1.09176,0.784282,1.670852
2017-07-04,0.698941,-0.871233,0.330722,-0.507892
2017-07-02,0.572031,0.521646,0.243727,-0.336621
2017-07-03,0.263058,-1.402907,-0.992022,-0.512866
2017-07-01,0.42203,-0.052644,0.704624,-2.286333


In [32]:
df2.sort_index(axis=0)

Unnamed: 0,D,B,C,A
2017-07-01,0.42203,-0.052644,0.704624,-2.286333
2017-07-02,0.572031,0.521646,0.243727,-0.336621
2017-07-03,0.263058,-1.402907,-0.992022,-0.512866
2017-07-04,0.698941,-0.871233,0.330722,-0.507892
2017-07-05,1.683189,1.331042,0.651229,1.540193
2017-07-06,-0.978019,-1.09176,0.784282,1.670852


In [33]:
df2.sort_index(axis=1)

Unnamed: 0,A,B,C,D
2017-07-05,1.540193,1.331042,0.651229,1.683189
2017-07-06,1.670852,-1.09176,0.784282,-0.978019
2017-07-04,-0.507892,-0.871233,0.330722,0.698941
2017-07-02,-0.336621,0.521646,0.243727,0.572031
2017-07-03,-0.512866,-1.402907,-0.992022,0.263058
2017-07-01,-2.286333,-0.052644,0.704624,0.42203


In [34]:
df2.sort_index(axis=0, ascending=False)

Unnamed: 0,D,B,C,A
2017-07-06,-0.978019,-1.09176,0.784282,1.670852
2017-07-05,1.683189,1.331042,0.651229,1.540193
2017-07-04,0.698941,-0.871233,0.330722,-0.507892
2017-07-03,0.263058,-1.402907,-0.992022,-0.512866
2017-07-02,0.572031,0.521646,0.243727,-0.336621
2017-07-01,0.42203,-0.052644,0.704624,-2.286333


In [35]:
df2

Unnamed: 0,D,B,C,A
2017-07-05,1.683189,1.331042,0.651229,1.540193
2017-07-06,-0.978019,-1.09176,0.784282,1.670852
2017-07-04,0.698941,-0.871233,0.330722,-0.507892
2017-07-02,0.572031,0.521646,0.243727,-0.336621
2017-07-03,0.263058,-1.402907,-0.992022,-0.512866
2017-07-01,0.42203,-0.052644,0.704624,-2.286333


In [36]:
df2.sort_values(by='D')

Unnamed: 0,D,B,C,A
2017-07-06,-0.978019,-1.09176,0.784282,1.670852
2017-07-03,0.263058,-1.402907,-0.992022,-0.512866
2017-07-01,0.42203,-0.052644,0.704624,-2.286333
2017-07-02,0.572031,0.521646,0.243727,-0.336621
2017-07-04,0.698941,-0.871233,0.330722,-0.507892
2017-07-05,1.683189,1.331042,0.651229,1.540193


In [37]:
df2.sort_values(by='B')

Unnamed: 0,D,B,C,A
2017-07-03,0.263058,-1.402907,-0.992022,-0.512866
2017-07-06,-0.978019,-1.09176,0.784282,1.670852
2017-07-04,0.698941,-0.871233,0.330722,-0.507892
2017-07-01,0.42203,-0.052644,0.704624,-2.286333
2017-07-02,0.572031,0.521646,0.243727,-0.336621
2017-07-05,1.683189,1.331042,0.651229,1.540193


In [38]:
df2['E'] = np.random.randint(0, 6, size=6)
df2['F'] = ['alpha', 'beta', 'gamma', 'gamma', 'alpha', 'gamma']

In [39]:
df2

Unnamed: 0,D,B,C,A,E,F
2017-07-05,1.683189,1.331042,0.651229,1.540193,3,alpha
2017-07-06,-0.978019,-1.09176,0.784282,1.670852,1,beta
2017-07-04,0.698941,-0.871233,0.330722,-0.507892,1,gamma
2017-07-02,0.572031,0.521646,0.243727,-0.336621,0,gamma
2017-07-03,0.263058,-1.402907,-0.992022,-0.512866,2,alpha
2017-07-01,0.42203,-0.052644,0.704624,-2.286333,4,gamma


In [40]:
df2.sort_values(by=['E', 'F'])

Unnamed: 0,D,B,C,A,E,F
2017-07-02,0.572031,0.521646,0.243727,-0.336621,0,gamma
2017-07-06,-0.978019,-1.09176,0.784282,1.670852,1,beta
2017-07-04,0.698941,-0.871233,0.330722,-0.507892,1,gamma
2017-07-03,0.263058,-1.402907,-0.992022,-0.512866,2,alpha
2017-07-05,1.683189,1.331042,0.651229,1.540193,3,alpha
2017-07-01,0.42203,-0.052644,0.704624,-2.286333,4,gamma


In [41]:
df2['F'].unique()

array(['alpha', 'beta', 'gamma'], dtype=object)

In [42]:
df2['F'].value_counts()

gamma    3
alpha    2
beta     1
Name: F, dtype: int64

In [43]:
df2['F'].isin(['alpha', 'beta'])

2017-07-05     True
2017-07-06     True
2017-07-04    False
2017-07-02    False
2017-07-03     True
2017-07-01    False
Name: F, dtype: bool

In [44]:
df2.loc[df2['F'].isin(['alpha', 'beta']), :]

Unnamed: 0,D,B,C,A,E,F
2017-07-05,1.683189,1.331042,0.651229,1.540193,3,alpha
2017-07-06,-0.978019,-1.09176,0.784282,1.670852,1,beta
2017-07-03,0.263058,-1.402907,-0.992022,-0.512866,2,alpha


In [45]:
df3 = pd.DataFrame(np.random.randn(4,3), columns=['b', 'd', 'e'])

In [46]:
df3

Unnamed: 0,b,d,e
0,1.409004,1.972384,-0.753602
1,-0.458595,2.199069,0.185819
2,-0.216609,-0.193846,-0.346463
3,0.646344,0.4084,-0.798355


In [47]:
func = lambda x: x.max() - x.min()

In [48]:
df3.apply(func, axis=0)

b    1.867599
d    2.392915
e    0.984174
dtype: float64

In [49]:
df3.apply(func, axis=1)

0    2.725986
1    2.657664
2    0.152617
3    1.444700
dtype: float64