In [1]:
import numpy as np
import pandas as pd

In [2]:
data = {"names": ["a", "b", "c", "d", "e"],
        "year": [2014, 2015, 2016, 2017, 2018],
        "points": [1.2, 3.2, 3.3, 4.0, 4.1]}
df = pd.DataFrame(data, columns = ['year', 'names', 'points', 'penalty'],
                        index = ['one', 'two', 'three', 'four', 'five'])

In [3]:
df

Unnamed: 0,year,names,points,penalty
one,2014,a,1.2,
two,2015,b,3.2,
three,2016,c,3.3,
four,2017,d,4.0,
five,2018,e,4.1,


In [4]:
# 열을 가져와보자

df["year"]

one      2014
two      2015
three    2016
four     2017
five     2018
Name: year, dtype: int64

In [5]:
df.year

one      2014
two      2015
three    2016
four     2017
five     2018
Name: year, dtype: int64

In [6]:
# 여러개의 열을 가져와보자

df[['year', 'points']]

Unnamed: 0,year,points
one,2014,1.2
two,2015,3.2
three,2016,3.3
four,2017,4.0
five,2018,4.1


In [7]:
# 열의 값을 변경해보자

df['penalty'] = 0.5

In [8]:
df

Unnamed: 0,year,names,points,penalty
one,2014,a,1.2,0.5
two,2015,b,3.2,0.5
three,2016,c,3.3,0.5
four,2017,d,4.0,0.5
five,2018,e,4.1,0.5


In [9]:
df['penalty'] = [0.1, 0.2, 0.3, 0.4, 0.5]

In [10]:
df

Unnamed: 0,year,names,points,penalty
one,2014,a,1.2,0.1
two,2015,b,3.2,0.2
three,2016,c,3.3,0.3
four,2017,d,4.0,0.4
five,2018,e,4.1,0.5


In [13]:
# 열을 추가해보자

df["arange"] = np.arange(5)

In [14]:
df

Unnamed: 0,year,names,points,penalty,zeros,arange
one,2014,a,1.2,0.1,0.0,0
two,2015,b,3.2,0.2,0.0,1
three,2016,c,3.3,0.3,0.0,2
four,2017,d,4.0,0.4,0.0,3
five,2018,e,4.1,0.5,0.0,4


In [16]:
val = pd.Series([-1.2, -1.5, -1.7], index=["two", "four", "five"])

In [17]:
df["debt"] = val

In [18]:
df

Unnamed: 0,year,names,points,penalty,zeros,arange,debt
one,2014,a,1.2,0.1,0.0,0,
two,2015,b,3.2,0.2,0.0,1,-1.2
three,2016,c,3.3,0.3,0.0,2,
four,2017,d,4.0,0.4,0.0,3,-1.5
five,2018,e,4.1,0.5,0.0,4,-1.7


In [20]:
df["net_points"] = df["points"] - df["penalty"]

In [21]:
df["high_points"] = df["net_points"] > 2.0

In [22]:
df

Unnamed: 0,year,names,points,penalty,zeros,arange,debt,net_points,high_points
one,2014,a,1.2,0.1,0.0,0,,1.1,False
two,2015,b,3.2,0.2,0.0,1,-1.2,3.0,True
three,2016,c,3.3,0.3,0.0,2,,3.0,True
four,2017,d,4.0,0.4,0.0,3,-1.5,3.6,True
five,2018,e,4.1,0.5,0.0,4,-1.7,3.6,True


In [23]:
# 열을 삭제해보자

del df["high_points"]

In [25]:
del df["net_points"]

In [26]:
del df["zeros"]

In [27]:
df

Unnamed: 0,year,names,points,penalty,arange,debt
one,2014,a,1.2,0.1,0,
two,2015,b,3.2,0.2,1,-1.2
three,2016,c,3.3,0.3,2,
four,2017,d,4.0,0.4,3,-1.5
five,2018,e,4.1,0.5,4,-1.7


In [28]:
df.columns

Index(['year', 'names', 'points', 'penalty', 'arange', 'debt'], dtype='object')

In [31]:
# 인덱스와 칼럼의 이름을 설정해주자

df.index.name = "Index"
df.columns.name = "Info"

In [32]:
df

Info,year,names,points,penalty,arange,debt
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
one,2014,a,1.2,0.1,0,
two,2015,b,3.2,0.2,1,-1.2
three,2016,c,3.3,0.3,2,
four,2017,d,4.0,0.4,3,-1.5
five,2018,e,4.1,0.5,4,-1.7


In [33]:
df[0:3]

Info,year,names,points,penalty,arange,debt
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
one,2014,a,1.2,0.1,0,
two,2015,b,3.2,0.2,1,-1.2
three,2016,c,3.3,0.3,2,


In [34]:
df["one":"three"]

Info,year,names,points,penalty,arange,debt
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
one,2014,a,1.2,0.1,0,
two,2015,b,3.2,0.2,1,-1.2
three,2016,c,3.3,0.3,2,


In [35]:
# loc와 iloc에 대해서 알아보자

df.loc["two"]

Info
year       2015
names         b
points      3.2
penalty     0.2
arange        1
debt       -1.2
Name: two, dtype: object

In [37]:
df.loc["two":"four"]

Info,year,names,points,penalty,arange,debt
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
two,2015,b,3.2,0.2,1,-1.2
three,2016,c,3.3,0.3,2,
four,2017,d,4.0,0.4,3,-1.5


In [39]:
df.loc["two":"four", "points"]

Index
two      3.2
three    3.3
four     4.0
Name: points, dtype: float64

In [40]:
df.loc[:, "year"]

Index
one      2014
two      2015
three    2016
four     2017
five     2018
Name: year, dtype: int64

In [41]:
df.loc[:, ["year", "names"]]

Info,year,names
Index,Unnamed: 1_level_1,Unnamed: 2_level_1
one,2014,a
two,2015,b
three,2016,c
four,2017,d
five,2018,e


In [44]:
df.loc["one":"three", "year": "penalty"]

Info,year,names,points,penalty
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
one,2014,a,1.2,0.1
two,2015,b,3.2,0.2
three,2016,c,3.3,0.3


In [49]:
df.loc["six"] = [2017, "k", 2.4, 1.2, 1.5, np.NaN]

In [50]:
df

Info,year,names,points,penalty,arange,debt
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
one,2014,a,1.2,0.1,0.0,
two,2015,b,3.2,0.2,1.0,-1.2
three,2016,c,3.3,0.3,2.0,
four,2017,d,4.0,0.4,3.0,-1.5
five,2018,e,4.1,0.5,4.0,-1.7
six,2017,k,2.4,1.2,1.5,


In [51]:
df.iloc[3]

Info
year       2017
names         d
points        4
penalty     0.4
arange        3
debt       -1.5
Name: four, dtype: object

In [52]:
df.iloc[3:5, 3]

Index
four    0.4
five    0.5
Name: penalty, dtype: float64

In [53]:
df.iloc[[0,1,2], [1,2]]

Info,names,points
Index,Unnamed: 1_level_1,Unnamed: 2_level_1
one,a,1.2
two,b,3.2
three,c,3.3


In [57]:
df

Info,year,names,points,penalty,arange,debt
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
one,2014,a,1.2,0.1,0.0,
two,2015,b,3.2,0.2,1.0,-1.2
three,2016,c,3.3,0.3,2.0,
four,2017,d,4.0,0.4,3.0,-1.5
five,2018,e,4.1,0.5,4.0,-1.7
six,2017,k,2.4,1.2,1.5,


In [54]:
df.iloc[1,1]

'b'

In [58]:
# 불리언 인덱싱

df["year"] > 2014

Index
one      False
two       True
three     True
four      True
five      True
six       True
Name: year, dtype: bool

In [59]:
df.loc[df["year"] > 2014, :]

Info,year,names,points,penalty,arange,debt
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
two,2015,b,3.2,0.2,1.0,-1.2
three,2016,c,3.3,0.3,2.0,
four,2017,d,4.0,0.4,3.0,-1.5
five,2018,e,4.1,0.5,4.0,-1.7
six,2017,k,2.4,1.2,1.5,


In [60]:
df.loc[df["names"] == "a", :]

Info,year,names,points,penalty,arange,debt
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
one,2014,a,1.2,0.1,0.0,


In [61]:
# points가 2보다 크고 3보다 작은 행을 가져와보자

df.loc[ (df["points"] > 2)  & (df["points"] < 3), :]

Info,year,names,points,penalty,arange,debt
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
six,2017,k,2.4,1.2,1.5,


In [62]:
# points가 3보다 큰 행의 penalty를 0으로 변경해보자

df.loc[df["points"]>3, "penalty"] = 0

In [63]:
df

Info,year,names,points,penalty,arange,debt
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
one,2014,a,1.2,0.1,0.0,
two,2015,b,3.2,0.0,1.0,-1.2
three,2016,c,3.3,0.0,2.0,
four,2017,d,4.0,0.0,3.0,-1.5
five,2018,e,4.1,0.0,4.0,-1.7
six,2017,k,2.4,1.2,1.5,
