In [1]:
import numpy as np
import pandas as pd

In [2]:
data = [[9.9, 8.8], [np.nan, 6.6],[7.7, np.nan],  [0.99, 9.8]]
df = pd.DataFrame(data, columns=["first", "second"], index=["a", "b", "c", "d"])

In [3]:
df

Unnamed: 0,first,second
a,9.9,8.8
b,,6.6
c,7.7,
d,0.99,9.8


In [4]:
df.sum(axis=0)

first     18.59
second    25.20
dtype: float64

In [5]:
df.sum(axis=1)

a    18.70
b     6.60
c     7.70
d    10.79
dtype: float64

In [6]:
df['first'].sum() 

18.59

In [7]:
df.loc["a"].sum() 

18.700000000000003

In [8]:
df.mean?

In [9]:
df.mean(axis=1, skipna=False)

a    9.350
b      NaN
c      NaN
d    5.395
dtype: float64

In [10]:
df

Unnamed: 0,first,second
a,9.9,8.8
b,,6.6
c,7.7,
d,0.99,9.8


In [11]:
first_mean = df.mean(axis=0)["first"]

In [12]:
second_min = df.min(axis=0)["second"]

In [13]:
df["first"] = df["first"].fillna(value=first_mean)

In [14]:
df["second"] = df["second"].fillna(value=second_min)

In [15]:
df

Unnamed: 0,first,second
a,9.9,8.8
b,6.196667,6.6
c,7.7,6.6
d,0.99,9.8


In [16]:
df2 = pd.DataFrame(np.random.randn(6, 4),
                   columns=["A", "B", "C", "D"],
                   index=pd.date_range("20180220", periods=6))

In [17]:
df2

Unnamed: 0,A,B,C,D
2018-02-20,-1.505131,1.010963,-0.289064,-1.41619
2018-02-21,-0.731963,-1.520187,1.084567,-0.286412
2018-02-22,-1.127835,0.888428,1.470728,-0.17661
2018-02-23,0.658658,2.506299,1.483251,0.350645
2018-02-24,1.35903,0.275115,0.079112,0.595414
2018-02-25,-0.141115,0.226454,-2.508144,-0.196274


In [18]:
df2["A"].corr(df2["B"])

0.1976668886760603

In [19]:
df2["A"].corr(df2["C"])

-0.030104159257117246

In [20]:
df2["A"].corr(df2["D"])

0.87264720302364707

In [21]:
df2.corr() 

Unnamed: 0,A,B,C,D
A,1.0,0.197667,-0.030104,0.872647
B,0.197667,1.0,0.178944,0.105609
C,-0.030104,0.178944,1.0,0.216118
D,0.872647,0.105609,0.216118,1.0


In [22]:
df2["A"].cov(df2["D"])

0.66561088876195007

In [23]:
df2.cov() 

Unnamed: 0,A,B,C,D
A,1.200941,0.284402,-0.050313,0.665611
B,0.284402,1.723756,0.358301,0.096507
C,-0.050313,0.358301,2.325873,0.229406
D,0.665611,0.096507,0.229406,0.484442


In [24]:
df2

Unnamed: 0,A,B,C,D
2018-02-20,-1.505131,1.010963,-0.289064,-1.41619
2018-02-21,-0.731963,-1.520187,1.084567,-0.286412
2018-02-22,-1.127835,0.888428,1.470728,-0.17661
2018-02-23,0.658658,2.506299,1.483251,0.350645
2018-02-24,1.35903,0.275115,0.079112,0.595414
2018-02-25,-0.141115,0.226454,-2.508144,-0.196274


In [25]:
dates = df2.index
# permutation 은 치환
random_dates = np.random.permutation(dates)
# 무작위로 섞어봄. index 순서와 컬럼의 순서가 불규칙하게 변함
df2 = df2.reindex(index=random_dates, columns=["D", "B", "C", "A"])

In [26]:
df2

Unnamed: 0,D,B,C,A
2018-02-25,-0.196274,0.226454,-2.508144,-0.141115
2018-02-20,-1.41619,1.010963,-0.289064,-1.505131
2018-02-24,0.595414,0.275115,0.079112,1.35903
2018-02-21,-0.286412,-1.520187,1.084567,-0.731963
2018-02-22,-0.17661,0.888428,1.470728,-1.127835
2018-02-23,0.350645,2.506299,1.483251,0.658658


In [27]:
df2.sort_index(axis=0)

Unnamed: 0,D,B,C,A
2018-02-20,-1.41619,1.010963,-0.289064,-1.505131
2018-02-21,-0.286412,-1.520187,1.084567,-0.731963
2018-02-22,-0.17661,0.888428,1.470728,-1.127835
2018-02-23,0.350645,2.506299,1.483251,0.658658
2018-02-24,0.595414,0.275115,0.079112,1.35903
2018-02-25,-0.196274,0.226454,-2.508144,-0.141115


In [28]:
df2.sort_index(axis=1)

Unnamed: 0,A,B,C,D
2018-02-25,-0.141115,0.226454,-2.508144,-0.196274
2018-02-20,-1.505131,1.010963,-0.289064,-1.41619
2018-02-24,1.35903,0.275115,0.079112,0.595414
2018-02-21,-0.731963,-1.520187,1.084567,-0.286412
2018-02-22,-1.127835,0.888428,1.470728,-0.17661
2018-02-23,0.658658,2.506299,1.483251,0.350645


In [29]:
df2.sort_index(axis=0, ascending=False)

Unnamed: 0,D,B,C,A
2018-02-25,-0.196274,0.226454,-2.508144,-0.141115
2018-02-24,0.595414,0.275115,0.079112,1.35903
2018-02-23,0.350645,2.506299,1.483251,0.658658
2018-02-22,-0.17661,0.888428,1.470728,-1.127835
2018-02-21,-0.286412,-1.520187,1.084567,-0.731963
2018-02-20,-1.41619,1.010963,-0.289064,-1.505131


In [30]:
df2.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2018-02-25,-0.196274,-2.508144,0.226454,-0.141115
2018-02-20,-1.41619,-0.289064,1.010963,-1.505131
2018-02-24,0.595414,0.079112,0.275115,1.35903
2018-02-21,-0.286412,1.084567,-1.520187,-0.731963
2018-02-22,-0.17661,1.470728,0.888428,-1.127835
2018-02-23,0.350645,1.483251,2.506299,0.658658


In [31]:
df2

Unnamed: 0,D,B,C,A
2018-02-25,-0.196274,0.226454,-2.508144,-0.141115
2018-02-20,-1.41619,1.010963,-0.289064,-1.505131
2018-02-24,0.595414,0.275115,0.079112,1.35903
2018-02-21,-0.286412,-1.520187,1.084567,-0.731963
2018-02-22,-0.17661,0.888428,1.470728,-1.127835
2018-02-23,0.350645,2.506299,1.483251,0.658658


In [32]:
df2.sort_values(by="D")

Unnamed: 0,D,B,C,A
2018-02-20,-1.41619,1.010963,-0.289064,-1.505131
2018-02-21,-0.286412,-1.520187,1.084567,-0.731963
2018-02-25,-0.196274,0.226454,-2.508144,-0.141115
2018-02-22,-0.17661,0.888428,1.470728,-1.127835
2018-02-23,0.350645,2.506299,1.483251,0.658658
2018-02-24,0.595414,0.275115,0.079112,1.35903


In [33]:
df2.sort_values(by="D", ascending=False)

Unnamed: 0,D,B,C,A
2018-02-24,0.595414,0.275115,0.079112,1.35903
2018-02-23,0.350645,2.506299,1.483251,0.658658
2018-02-22,-0.17661,0.888428,1.470728,-1.127835
2018-02-25,-0.196274,0.226454,-2.508144,-0.141115
2018-02-21,-0.286412,-1.520187,1.084567,-0.731963
2018-02-20,-1.41619,1.010963,-0.289064,-1.505131


In [34]:
df2.sort_values(by="A", ascending=False)

Unnamed: 0,D,B,C,A
2018-02-24,0.595414,0.275115,0.079112,1.35903
2018-02-23,0.350645,2.506299,1.483251,0.658658
2018-02-25,-0.196274,0.226454,-2.508144,-0.141115
2018-02-21,-0.286412,-1.520187,1.084567,-0.731963
2018-02-22,-0.17661,0.888428,1.470728,-1.127835
2018-02-20,-1.41619,1.010963,-0.289064,-1.505131


In [35]:
df2.sort_values(by="A")

Unnamed: 0,D,B,C,A
2018-02-20,-1.41619,1.010963,-0.289064,-1.505131
2018-02-22,-0.17661,0.888428,1.470728,-1.127835
2018-02-21,-0.286412,-1.520187,1.084567,-0.731963
2018-02-25,-0.196274,0.226454,-2.508144,-0.141115
2018-02-23,0.350645,2.506299,1.483251,0.658658
2018-02-24,0.595414,0.275115,0.079112,1.35903


In [36]:
df2["E"] = np.random.randint(0, 6, size=6)
df2["F"] = ["first", "second", "first", "third", "first", "second"]

In [37]:
df2

Unnamed: 0,D,B,C,A,E,F
2018-02-25,-0.196274,0.226454,-2.508144,-0.141115,5,first
2018-02-20,-1.41619,1.010963,-0.289064,-1.505131,0,second
2018-02-24,0.595414,0.275115,0.079112,1.35903,1,first
2018-02-21,-0.286412,-1.520187,1.084567,-0.731963,3,third
2018-02-22,-0.17661,0.888428,1.470728,-1.127835,0,first
2018-02-23,0.350645,2.506299,1.483251,0.658658,3,second


In [38]:
df2.sort_values(by=["E","F"]) 

Unnamed: 0,D,B,C,A,E,F
2018-02-22,-0.17661,0.888428,1.470728,-1.127835,0,first
2018-02-20,-1.41619,1.010963,-0.289064,-1.505131,0,second
2018-02-24,0.595414,0.275115,0.079112,1.35903,1,first
2018-02-23,0.350645,2.506299,1.483251,0.658658,3,second
2018-02-21,-0.286412,-1.520187,1.084567,-0.731963,3,third
2018-02-25,-0.196274,0.226454,-2.508144,-0.141115,5,first


In [39]:
df2["F"].unique()

array(['first', 'second', 'third'], dtype=object)

In [40]:
df2["F"].value_counts() 

first     3
second    2
third     1
Name: F, dtype: int64

In [41]:
df2["F"].isin(['first','third']) 

2018-02-25     True
2018-02-20    False
2018-02-24     True
2018-02-21     True
2018-02-22     True
2018-02-23    False
Name: F, dtype: bool

In [42]:
df2.loc[ df2["F"].isin(['first','third']) ,  : ]  

Unnamed: 0,D,B,C,A,E,F
2018-02-25,-0.196274,0.226454,-2.508144,-0.141115,5,first
2018-02-24,0.595414,0.275115,0.079112,1.35903,1,first
2018-02-21,-0.286412,-1.520187,1.084567,-0.731963,3,third
2018-02-22,-0.17661,0.888428,1.470728,-1.127835,0,first


In [43]:
df3 = pd.DataFrame(np.random.randn(4, 3), columns=["b", "d", "e"],
                   index=["Seoul", "Incheon", "Busan", "Daegu"])

In [44]:
df3

Unnamed: 0,b,d,e
Seoul,1.127198,2.189873,-0.598209
Incheon,-0.461433,-1.199689,0.044034
Busan,-0.983571,1.654711,0.528438
Daegu,-0.235316,-1.400332,0.158999


In [45]:
func = lambda x : x.max() - x.min() 

In [47]:
df3.apply(func, axis=0)

b    2.110769
d    3.590204
e    1.126648
dtype: float64

In [48]:
df3.apply(func, axis=1)

Seoul      2.788082
Incheon    1.243723
Busan      2.638282
Daegu      1.559330
dtype: float64

In [49]:
df3.apply?

In [50]:
df3.apply??