In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {"names": ["Kilho", "Kilho", "Kilho", "Charles", "Charles"],
           "year": [2014, 2015, 2016, 2015, 2016],
           "points": [1.5, 1.7, 3.6, 2.4, 2.9]}
df = pd.DataFrame(data, columns=["year", "names", "points", "penalty"],
                          index=["one", "two", "three", "four", "five"])

In [3]:
df

Unnamed: 0,year,names,points,penalty
one,2014,Kilho,1.5,
two,2015,Kilho,1.7,
three,2016,Kilho,3.6,
four,2015,Charles,2.4,
five,2016,Charles,2.9,


In [4]:
df["year"]

one      2014
two      2015
three    2016
four     2015
five     2016
Name: year, dtype: int64

In [6]:
df.year 
#위와 같음

one      2014
two      2015
three    2016
four     2015
five     2016
Name: year, dtype: int64

In [9]:
df[["year", "points"]]
#column 2개 indexing

Unnamed: 0,year,points
one,2014,1.5
two,2015,1.7
three,2016,3.6
four,2015,2.4
five,2016,2.9


In [10]:
df["penalty"] = 0.5

In [12]:
df["penalty"] = [0.1, 0.2, 0.3, 0.4, 0.5]

In [14]:
#열 추가하기
df["zeros"] = np.arange(5)

In [15]:
df

Unnamed: 0,year,names,points,penalty,zeros
one,2014,Kilho,1.5,0.1,0
two,2015,Kilho,1.7,0.2,1
three,2016,Kilho,3.6,0.3,2
four,2015,Charles,2.4,0.4,3
five,2016,Charles,2.9,0.5,4


In [31]:
df["net_points"] = df["points"] - df["penalty"]
#새로운 열을 만들 때 기존의 열로 연산 가능

In [32]:
df["high_points"] = df["net_points"] > 2.0

In [33]:
df

Unnamed: 0,year,names,points,penalty,net_points,high_points
one,2014,Kilho,1.5,0.1,1.4,False
two,2015,Kilho,1.7,0.2,1.5,False
three,2016,Kilho,3.6,0.3,3.3,True
four,2015,Charles,2.4,0.4,2.0,False
five,2016,Charles,2.9,0.5,2.4,True


In [34]:
#열 삭제
del df["high_points"]

In [35]:
del df["net_points"]

In [36]:
del df["zeros"]

KeyError: 'zeros'

In [37]:
df

Unnamed: 0,year,names,points,penalty
one,2014,Kilho,1.5,0.1
two,2015,Kilho,1.7,0.2
three,2016,Kilho,3.6,0.3
four,2015,Charles,2.4,0.4
five,2016,Charles,2.9,0.5


In [39]:
df.columns

Index(['year', 'names', 'points', 'penalty'], dtype='object')

In [40]:
df.index.name = "Order"

In [41]:
df.columns.name = "Info"

In [42]:
df

Info,year,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
one,2014,Kilho,1.5,0.1
two,2015,Kilho,1.7,0.2
three,2016,Kilho,3.6,0.3
four,2015,Charles,2.4,0.4
five,2016,Charles,2.9,0.5


In [45]:
df[0:3]
#1행~3행 가져옴

Info,year,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
one,2014,Kilho,1.5,0.1
two,2015,Kilho,1.7,0.2
three,2016,Kilho,3.6,0.3


In [47]:
df["two":"four"]
#이런식으로도 가능

Info,year,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
two,2015,Kilho,1.7,0.2
three,2016,Kilho,3.6,0.3
four,2015,Charles,2.4,0.4


In [48]:
# .loc .iloc 이걸로 인덱싱하는 것이 제일 바람직
# location 약자인듯
df.loc["two"]

Info
year        2015
names      Kilho
points       1.7
penalty      0.2
Name: two, dtype: object

In [49]:
df.loc["two":"four"]

Info,year,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
two,2015,Kilho,1.7,0.2
three,2016,Kilho,3.6,0.3
four,2015,Charles,2.4,0.4


In [51]:
df.loc["two":"four", "points"]
#[행, 열] 이렇게 써서 인덱싱

Order
two      1.7
three    3.6
four     2.4
Name: points, dtype: float64

In [53]:
df.loc[:, "year"]
#loc를 쓰면 정확하게 표현됨

Order
one      2014
two      2015
three    2016
four     2015
five     2016
Name: year, dtype: int64

In [54]:
df.loc["three":"five", "year":"penalty"]

Info,year,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
three,2016,Kilho,3.6,0.3
four,2015,Charles,2.4,0.4
five,2016,Charles,2.9,0.5


In [57]:
df.loc["six", :] = [2013, "Hayoung", 4, 0.1]

In [58]:
df

Info,year,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
one,2014.0,Kilho,1.5,0.1
two,2015.0,Kilho,1.7,0.2
three,2016.0,Kilho,3.6,0.3
four,2015.0,Charles,2.4,0.4
five,2016.0,Charles,2.9,0.5
six,2013.0,Hayoung,4.0,0.1


In [59]:
# .loc 숫자로 표현해서 indexing

In [62]:
df.iloc[1,2]

1.7

In [63]:
df.iloc[[0,2], [1,2]]

Info,names,points
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
one,Kilho,1.5
three,Kilho,3.6


In [65]:
#불리언 인덱싱
df

Info,year,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
one,2014.0,Kilho,1.5,0.1
two,2015.0,Kilho,1.7,0.2
three,2016.0,Kilho,3.6,0.3
four,2015.0,Charles,2.4,0.4
five,2016.0,Charles,2.9,0.5
six,2013.0,Hayoung,4.0,0.1


In [66]:
df["year"] > 2014
#이런걸 마스크라고함

Order
one      False
two       True
three     True
four      True
five      True
six      False
Name: year, dtype: bool

In [67]:
df.loc[df["year"] > 2014, :]

Info,year,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
two,2015.0,Kilho,1.7,0.2
three,2016.0,Kilho,3.6,0.3
four,2015.0,Charles,2.4,0.4
five,2016.0,Charles,2.9,0.5


In [69]:
df.loc[df["names"] == "Kilho", ["names", "points"]]

Info,names,points
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
one,Kilho,1.5
two,Kilho,1.7
three,Kilho,3.6


In [72]:
df.loc[(df["points"] > 2) & (df["points"] < 3), :]
#points값이 2와 3 사이인 것만 뽑기

Info,year,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
four,2015.0,Charles,2.4,0.4
five,2016.0,Charles,2.9,0.5


In [73]:
df.loc[(df["points"]>3) ]

Info,year,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
three,2016.0,Kilho,3.6,0.3
six,2013.0,Hayoung,4.0,0.1
