In [1]:
import numpy as np
import pandas as pd

In [2]:
data = [[9.9, 8.8], [np.nan, 6.6],[7.7, np.nan],  [0.99, 9.8]]
df = pd.DataFrame(data, columns=["first", "second"], 
                  index=["a", "b", "c", "d"])

In [3]:
df

Unnamed: 0,first,second
a,9.9,8.8
b,,6.6
c,7.7,
d,0.99,9.8


In [4]:
df.sum(axis=0)

first     18.59
second    25.20
dtype: float64

In [5]:
df.sum(axis=1)

a    18.70
b     6.60
c     7.70
d    10.79
dtype: float64

In [6]:
df.sum?

In [7]:
df['first'].sum() 

18.59

In [8]:
df.loc['a'].sum() 

18.700000000000003

In [9]:
df

Unnamed: 0,first,second
a,9.9,8.8
b,,6.6
c,7.7,
d,0.99,9.8


In [10]:
df.mean(axis=1, skipna=False)

a    9.350
b      NaN
c      NaN
d    5.395
dtype: float64

In [11]:
df

Unnamed: 0,first,second
a,9.9,8.8
b,,6.6
c,7.7,
d,0.99,9.8


In [12]:
first_mean = df.mean(axis=0)['first']

In [13]:
second_min = df.min(axis=0)["second"]

In [14]:
df['first'] = df['first'].fillna(value=first_mean) 

In [15]:
df['second'] = df['second'].fillna(value=second_min) 

In [16]:
df

Unnamed: 0,first,second
a,9.9,8.8
b,6.196667,6.6
c,7.7,6.6
d,0.99,9.8


In [17]:
df2 = pd.DataFrame(np.random.randn(6, 4),
                   columns=["A", "B", "C", "D"],
                   index=pd.date_range("20180220", periods=6)) 

In [18]:
df2

Unnamed: 0,A,B,C,D
2018-02-20,1.568697,0.343947,0.585285,0.497546
2018-02-21,0.256069,-0.145049,-0.126036,-0.183935
2018-02-22,0.106011,0.498611,-0.312269,1.316774
2018-02-23,-0.684392,1.279201,-0.288131,2.480668
2018-02-24,0.269967,0.029419,-0.061232,-0.337866
2018-02-25,1.170311,-0.415883,-0.501058,-1.709563


In [19]:
df2['A'].corr(df2['B']) 

-0.62927294102649201

In [20]:
df2['A'].cov(df2['B'])

-0.30221113062294386

In [21]:
df2['A'].corr(df2['C'])  

0.50152356867535974

In [22]:
df2['A'].corr(df2['D'])  

-0.65805464649371026

In [23]:
df2.corr() 

Unnamed: 0,A,B,C,D
A,1.0,-0.629273,0.501524,-0.658055
B,-0.629273,1.0,0.078575,0.968375
C,0.501524,0.078575,1.0,0.112598
D,-0.658055,0.968375,0.112598,1.0


In [24]:
df2.cov() 

Unnamed: 0,A,B,C,D
A,0.649099,-0.302211,0.152402,-0.768147
B,-0.302211,0.35533,0.017666,0.836346
C,0.152402,0.017666,0.142262,0.061532
D,-0.768147,0.836346,0.061532,2.099198


In [25]:
dates = df2.index
# permutation 은 치환
random_dates = np.random.permutation(dates)
# 무작위로 섞어봄. index 순서와 컬럼의 순서가 불규칙하게 변함
df2 = df2.reindex(index=random_dates, 
                  columns=["D", "B", "C", "A"])

In [26]:
df2

Unnamed: 0,D,B,C,A
2018-02-20,0.497546,0.343947,0.585285,1.568697
2018-02-24,-0.337866,0.029419,-0.061232,0.269967
2018-02-21,-0.183935,-0.145049,-0.126036,0.256069
2018-02-23,2.480668,1.279201,-0.288131,-0.684392
2018-02-22,1.316774,0.498611,-0.312269,0.106011
2018-02-25,-1.709563,-0.415883,-0.501058,1.170311


In [27]:
df2.sort_index(axis=0)

Unnamed: 0,D,B,C,A
2018-02-20,0.497546,0.343947,0.585285,1.568697
2018-02-21,-0.183935,-0.145049,-0.126036,0.256069
2018-02-22,1.316774,0.498611,-0.312269,0.106011
2018-02-23,2.480668,1.279201,-0.288131,-0.684392
2018-02-24,-0.337866,0.029419,-0.061232,0.269967
2018-02-25,-1.709563,-0.415883,-0.501058,1.170311


In [28]:
df2.sort_index(axis=1)

Unnamed: 0,A,B,C,D
2018-02-20,1.568697,0.343947,0.585285,0.497546
2018-02-24,0.269967,0.029419,-0.061232,-0.337866
2018-02-21,0.256069,-0.145049,-0.126036,-0.183935
2018-02-23,-0.684392,1.279201,-0.288131,2.480668
2018-02-22,0.106011,0.498611,-0.312269,1.316774
2018-02-25,1.170311,-0.415883,-0.501058,-1.709563


In [29]:
df2.sort_index(axis=0, ascending=False) 

Unnamed: 0,D,B,C,A
2018-02-25,-1.709563,-0.415883,-0.501058,1.170311
2018-02-24,-0.337866,0.029419,-0.061232,0.269967
2018-02-23,2.480668,1.279201,-0.288131,-0.684392
2018-02-22,1.316774,0.498611,-0.312269,0.106011
2018-02-21,-0.183935,-0.145049,-0.126036,0.256069
2018-02-20,0.497546,0.343947,0.585285,1.568697


In [30]:
df2.sort_index(axis=1, ascending=False) 

Unnamed: 0,D,C,B,A
2018-02-20,0.497546,0.585285,0.343947,1.568697
2018-02-24,-0.337866,-0.061232,0.029419,0.269967
2018-02-21,-0.183935,-0.126036,-0.145049,0.256069
2018-02-23,2.480668,-0.288131,1.279201,-0.684392
2018-02-22,1.316774,-0.312269,0.498611,0.106011
2018-02-25,-1.709563,-0.501058,-0.415883,1.170311


In [31]:
df2

Unnamed: 0,D,B,C,A
2018-02-20,0.497546,0.343947,0.585285,1.568697
2018-02-24,-0.337866,0.029419,-0.061232,0.269967
2018-02-21,-0.183935,-0.145049,-0.126036,0.256069
2018-02-23,2.480668,1.279201,-0.288131,-0.684392
2018-02-22,1.316774,0.498611,-0.312269,0.106011
2018-02-25,-1.709563,-0.415883,-0.501058,1.170311


In [32]:
df2.sort_values(by='D')

Unnamed: 0,D,B,C,A
2018-02-25,-1.709563,-0.415883,-0.501058,1.170311
2018-02-24,-0.337866,0.029419,-0.061232,0.269967
2018-02-21,-0.183935,-0.145049,-0.126036,0.256069
2018-02-20,0.497546,0.343947,0.585285,1.568697
2018-02-22,1.316774,0.498611,-0.312269,0.106011
2018-02-23,2.480668,1.279201,-0.288131,-0.684392


In [34]:
df2.sort_values(by='D', ascending=False)

Unnamed: 0,D,B,C,A
2018-02-23,2.480668,1.279201,-0.288131,-0.684392
2018-02-22,1.316774,0.498611,-0.312269,0.106011
2018-02-20,0.497546,0.343947,0.585285,1.568697
2018-02-21,-0.183935,-0.145049,-0.126036,0.256069
2018-02-24,-0.337866,0.029419,-0.061232,0.269967
2018-02-25,-1.709563,-0.415883,-0.501058,1.170311


In [35]:
df2.sort_values(by='B', ascending=False)

Unnamed: 0,D,B,C,A
2018-02-23,2.480668,1.279201,-0.288131,-0.684392
2018-02-22,1.316774,0.498611,-0.312269,0.106011
2018-02-20,0.497546,0.343947,0.585285,1.568697
2018-02-24,-0.337866,0.029419,-0.061232,0.269967
2018-02-21,-0.183935,-0.145049,-0.126036,0.256069
2018-02-25,-1.709563,-0.415883,-0.501058,1.170311


In [36]:
df2["E"] = np.random.randint(0, 6, size=6)
df2["F"] = ["first", "second", "first", "third", "first", "second"] 

In [37]:
df2

Unnamed: 0,D,B,C,A,E,F
2018-02-20,0.497546,0.343947,0.585285,1.568697,3,first
2018-02-24,-0.337866,0.029419,-0.061232,0.269967,3,second
2018-02-21,-0.183935,-0.145049,-0.126036,0.256069,5,first
2018-02-23,2.480668,1.279201,-0.288131,-0.684392,1,third
2018-02-22,1.316774,0.498611,-0.312269,0.106011,4,first
2018-02-25,-1.709563,-0.415883,-0.501058,1.170311,1,second


In [38]:
df2.sort_values(by=['E','F']) 

Unnamed: 0,D,B,C,A,E,F
2018-02-25,-1.709563,-0.415883,-0.501058,1.170311,1,second
2018-02-23,2.480668,1.279201,-0.288131,-0.684392,1,third
2018-02-20,0.497546,0.343947,0.585285,1.568697,3,first
2018-02-24,-0.337866,0.029419,-0.061232,0.269967,3,second
2018-02-22,1.316774,0.498611,-0.312269,0.106011,4,first
2018-02-21,-0.183935,-0.145049,-0.126036,0.256069,5,first


In [39]:
df2['F'].unique() 

array(['first', 'second', 'third'], dtype=object)

In [40]:
df2['F'].value_counts()  

first     3
second    2
third     1
Name: F, dtype: int64

In [41]:
df2['F'].isin(['first','second'])

2018-02-20     True
2018-02-24     True
2018-02-21     True
2018-02-23    False
2018-02-22     True
2018-02-25     True
Name: F, dtype: bool

In [42]:
df2.loc[ df2['F'].isin(['first','third']) , : ] 

Unnamed: 0,D,B,C,A,E,F
2018-02-20,0.497546,0.343947,0.585285,1.568697,3,first
2018-02-21,-0.183935,-0.145049,-0.126036,0.256069,5,first
2018-02-23,2.480668,1.279201,-0.288131,-0.684392,1,third
2018-02-22,1.316774,0.498611,-0.312269,0.106011,4,first


In [43]:
df3 = pd.DataFrame(np.random.randn(4, 3), 
                   columns=["b", "d", "e"],
                   index=["Seoul", "Incheon", "Busan", "Daegu"])

In [44]:
df3

Unnamed: 0,b,d,e
Seoul,-1.652508,-0.290852,0.541412
Incheon,0.247694,1.090744,0.918833
Busan,-0.432274,0.844922,0.955502
Daegu,1.447265,1.179997,0.656185


In [45]:
func = lambda x: x.max() - x.min()

In [46]:
df3.apply(func, axis=0) 

b    3.099774
d    1.470848
e    0.414089
dtype: float64

In [47]:
df3.apply(func, axis=1) 

Seoul      2.193921
Incheon    0.843050
Busan      1.387775
Daegu      0.791081
dtype: float64