In [53]:
import pandas as pd
import numpy as np

In [54]:
data = [[1.4, np.nan],
       [7.1, -4.5],
       [np.nan, np.nan],
       [0.75, -1.3]]
df = pd.DataFrame(data, columns = ['one', 'two'], index = ['a', 'b', 'c', 'd'])

In [55]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [56]:
# 행 방향으로 합 구하기
df.sum(axis=0)

one    9.25
two   -5.80
dtype: float64

In [57]:
# 열 방향으로 합 구하기
df.sum(axis=1)

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [58]:
# 원하는 열의 합 구하기

df['one'].sum()

9.25

In [59]:
# 원하는 행의 합 구하기

df.loc['b'].sum()

2.5999999999999996

In [60]:
# NaN을 무시하지 않고 계산하기

df.mean(axis=1, skipna=False)

a      NaN
b    1.300
c      NaN
d   -0.275
dtype: float64

In [61]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [62]:
# NaN을 다른 값으로 대체하기

one_mean = df.mean(axis=0)['one']

In [63]:
two_min = df.min(axis=0)['two']

In [64]:
df['one']

a    1.40
b    7.10
c     NaN
d    0.75
Name: one, dtype: float64

In [65]:
df['one'] = df['one'].fillna(value = one_mean)
df['two'] = df['two'].fillna(value = two_min)

In [66]:
df

Unnamed: 0,one,two
a,1.4,-4.5
b,7.1,-4.5
c,3.083333,-4.5
d,0.75,-1.3


In [67]:
df2 = pd.DataFrame(np.random.randn(6,4),
                  columns = ['A', 'B', 'C', 'D'],
                  index = pd.date_range("20190217", periods=6))

In [68]:
df2

Unnamed: 0,A,B,C,D
2019-02-17,-0.669385,-0.172566,-1.294716,2.884159
2019-02-18,-1.575448,-0.552403,-1.141842,0.221753
2019-02-19,0.032383,0.257757,1.177436,0.292356
2019-02-20,-0.376364,1.23046,-0.278866,0.324695
2019-02-21,-0.835133,-1.122096,-0.02048,-0.011589
2019-02-22,-1.2371,-0.032284,0.516336,2.082969


In [70]:
df2['A'].corr(df2['B'])

0.5235105283270653

In [72]:
df2['B'].cov(df2['C'])

0.15058322911414757

In [73]:
df2.corr()

Unnamed: 0,A,B,C,D
A,1.0,0.523511,0.499326,-0.130945
B,0.523511,1.0,0.19931,0.037148
C,0.499326,0.19931,1.0,-0.287155
D,-0.130945,0.037148,-0.287155,1.0


In [74]:
df2.cov()

Unnamed: 0,A,B,C,D
A,0.335956,0.240846,0.275487,-0.091718
B,0.240846,0.630009,0.150583,0.035632
C,0.275487,0.150583,0.906046,-0.330306
D,-0.091718,0.035632,-0.330306,1.460321


In [76]:
dates = df2.index
random_dates = np.random.permutation(dates)
df2 = df2.reindex(index = random_dates, columns=['D','B','C','A'])

In [77]:
df2

Unnamed: 0,D,B,C,A
2019-02-20,0.324695,1.23046,-0.278866,-0.376364
2019-02-17,2.884159,-0.172566,-1.294716,-0.669385
2019-02-19,0.292356,0.257757,1.177436,0.032383
2019-02-21,-0.011589,-1.122096,-0.02048,-0.835133
2019-02-18,0.221753,-0.552403,-1.141842,-1.575448
2019-02-22,2.082969,-0.032284,0.516336,-1.2371


In [78]:
df2.sort_index(axis=0)

Unnamed: 0,D,B,C,A
2019-02-17,2.884159,-0.172566,-1.294716,-0.669385
2019-02-18,0.221753,-0.552403,-1.141842,-1.575448
2019-02-19,0.292356,0.257757,1.177436,0.032383
2019-02-20,0.324695,1.23046,-0.278866,-0.376364
2019-02-21,-0.011589,-1.122096,-0.02048,-0.835133
2019-02-22,2.082969,-0.032284,0.516336,-1.2371


In [79]:
df2.sort_index(axis=1)

Unnamed: 0,A,B,C,D
2019-02-20,-0.376364,1.23046,-0.278866,0.324695
2019-02-17,-0.669385,-0.172566,-1.294716,2.884159
2019-02-19,0.032383,0.257757,1.177436,0.292356
2019-02-21,-0.835133,-1.122096,-0.02048,-0.011589
2019-02-18,-1.575448,-0.552403,-1.141842,0.221753
2019-02-22,-1.2371,-0.032284,0.516336,2.082969


In [80]:
df2.sort_index()

Unnamed: 0,D,B,C,A
2019-02-17,2.884159,-0.172566,-1.294716,-0.669385
2019-02-18,0.221753,-0.552403,-1.141842,-1.575448
2019-02-19,0.292356,0.257757,1.177436,0.032383
2019-02-20,0.324695,1.23046,-0.278866,-0.376364
2019-02-21,-0.011589,-1.122096,-0.02048,-0.835133
2019-02-22,2.082969,-0.032284,0.516336,-1.2371


In [81]:
# 내림차순으로 정렬하기

df2.sort_index(axis=0, ascending=False)

Unnamed: 0,D,B,C,A
2019-02-22,2.082969,-0.032284,0.516336,-1.2371
2019-02-21,-0.011589,-1.122096,-0.02048,-0.835133
2019-02-20,0.324695,1.23046,-0.278866,-0.376364
2019-02-19,0.292356,0.257757,1.177436,0.032383
2019-02-18,0.221753,-0.552403,-1.141842,-1.575448
2019-02-17,2.884159,-0.172566,-1.294716,-0.669385


In [83]:
df2

Unnamed: 0,D,B,C,A
2019-02-20,0.324695,1.23046,-0.278866,-0.376364
2019-02-17,2.884159,-0.172566,-1.294716,-0.669385
2019-02-19,0.292356,0.257757,1.177436,0.032383
2019-02-21,-0.011589,-1.122096,-0.02048,-0.835133
2019-02-18,0.221753,-0.552403,-1.141842,-1.575448
2019-02-22,2.082969,-0.032284,0.516336,-1.2371


In [84]:
# D열을 기준으로 값을 정렬하기

df2.sort_values(by='D')

Unnamed: 0,D,B,C,A
2019-02-21,-0.011589,-1.122096,-0.02048,-0.835133
2019-02-18,0.221753,-0.552403,-1.141842,-1.575448
2019-02-19,0.292356,0.257757,1.177436,0.032383
2019-02-20,0.324695,1.23046,-0.278866,-0.376364
2019-02-22,2.082969,-0.032284,0.516336,-1.2371
2019-02-17,2.884159,-0.172566,-1.294716,-0.669385


In [87]:
df2.sort_values(by='B')

Unnamed: 0,D,B,C,A
2019-02-21,-0.011589,-1.122096,-0.02048,-0.835133
2019-02-18,0.221753,-0.552403,-1.141842,-1.575448
2019-02-17,2.884159,-0.172566,-1.294716,-0.669385
2019-02-22,2.082969,-0.032284,0.516336,-1.2371
2019-02-19,0.292356,0.257757,1.177436,0.032383
2019-02-20,0.324695,1.23046,-0.278866,-0.376364


In [88]:
df2['E'] = np.random.randint(0, 6, size=6)
df2['F'] = ['alpha', 'beta', 'gamma', 'gamma', 'alpha', 'gamma']

In [89]:
df2

Unnamed: 0,D,B,C,A,E,F
2019-02-20,0.324695,1.23046,-0.278866,-0.376364,2,alpha
2019-02-17,2.884159,-0.172566,-1.294716,-0.669385,3,beta
2019-02-19,0.292356,0.257757,1.177436,0.032383,5,gamma
2019-02-21,-0.011589,-1.122096,-0.02048,-0.835133,1,gamma
2019-02-18,0.221753,-0.552403,-1.141842,-1.575448,3,alpha
2019-02-22,2.082969,-0.032284,0.516336,-1.2371,5,gamma


In [90]:
df2.sort_values(by=['E', 'F'])

Unnamed: 0,D,B,C,A,E,F
2019-02-21,-0.011589,-1.122096,-0.02048,-0.835133,1,gamma
2019-02-20,0.324695,1.23046,-0.278866,-0.376364,2,alpha
2019-02-18,0.221753,-0.552403,-1.141842,-1.575448,3,alpha
2019-02-17,2.884159,-0.172566,-1.294716,-0.669385,3,beta
2019-02-19,0.292356,0.257757,1.177436,0.032383,5,gamma
2019-02-22,2.082969,-0.032284,0.516336,-1.2371,5,gamma


In [91]:
df2.sort_values(by='E')

Unnamed: 0,D,B,C,A,E,F
2019-02-21,-0.011589,-1.122096,-0.02048,-0.835133,1,gamma
2019-02-20,0.324695,1.23046,-0.278866,-0.376364,2,alpha
2019-02-17,2.884159,-0.172566,-1.294716,-0.669385,3,beta
2019-02-18,0.221753,-0.552403,-1.141842,-1.575448,3,alpha
2019-02-19,0.292356,0.257757,1.177436,0.032383,5,gamma
2019-02-22,2.082969,-0.032284,0.516336,-1.2371,5,gamma


In [92]:
df2['F'].unique()

array(['alpha', 'beta', 'gamma'], dtype=object)

In [93]:
set(df2['F'])

{'alpha', 'beta', 'gamma'}

In [95]:
sorted(set(df2['F']))

['alpha', 'beta', 'gamma']

In [98]:
type(df2['F'])

pandas.core.series.Series

In [99]:
df2['F'].value_counts()

gamma    3
alpha    2
beta     1
Name: F, dtype: int64

In [100]:
df2['F'].isin(['alpha', 'beta'])

2019-02-20     True
2019-02-17     True
2019-02-19    False
2019-02-21    False
2019-02-18     True
2019-02-22    False
Name: F, dtype: bool

In [102]:
df2.loc[df2['F'].isin(['alpha', 'beta']), :]

Unnamed: 0,D,B,C,A,E,F
2019-02-20,0.324695,1.23046,-0.278866,-0.376364,2,alpha
2019-02-17,2.884159,-0.172566,-1.294716,-0.669385,3,beta
2019-02-18,0.221753,-0.552403,-1.141842,-1.575448,3,alpha


In [103]:
df3 = pd.DataFrame(np.random.randn(4, 3),
                   columns = ['b', 'd', 'e'],
                   index = ['Seoul', 'Incheon', 'Busan', 'Daegu'])

In [104]:
df3

Unnamed: 0,b,d,e
Seoul,-0.781532,-0.562662,1.313161
Incheon,-1.348596,-1.65899,-0.575645
Busan,-1.920736,0.675095,0.99552
Daegu,-0.330073,0.153807,0.835627


In [105]:
func = lambda x: x.max() - x.min()

In [106]:
df3.apply(func, axis=0)

b    1.590663
d    2.334085
e    1.888806
dtype: float64

In [107]:
df3.apply(func, axis=1)

Seoul      2.094693
Incheon    1.083345
Busan      2.916255
Daegu      1.165700
dtype: float64