In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

In [4]:
exam_data = {'name' : ['John', 'Snow', 'Sun'],
            'math' : [90, 85, 70],
            'english' : [98, 88, 95],
            'music' : [85, 79, 78],
            'PE' : [80, 79, 75]}

df = pd.DataFrame(exam_data)
df = df.set_index('name')
df

Unnamed: 0_level_0,math,english,music,PE
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
John,90,98,85,80
Snow,85,88,79,79
Sun,70,95,78,75


In [3]:
df.shape

(3, 4)

In [5]:
print(df.iloc[0][3]) # John, PE
df.iloc[0][3] = 90
df

80


Unnamed: 0_level_0,math,english,music,PE
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
John,90,98,85,90
Snow,85,88,79,79
Sun,70,95,78,75


In [6]:
df.loc['Snow']['PE'] = 99
df

Unnamed: 0_level_0,math,english,music,PE
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
John,90,98,85,90
Snow,85,88,79,99
Sun,70,95,78,75


In [7]:
df.loc['John', ['PE', 'music']] = 120, 130
df

Unnamed: 0_level_0,math,english,music,PE
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
John,90,98,130,120
Snow,85,88,79,99
Sun,70,95,78,75


In [8]:
df_transpose = df.T

In [9]:
df_transpose2 = df.transpose()

In [12]:
(df_transpose == df_transpose2).all().all()

True

In [13]:
df_transpose

name,John,Snow,Sun
math,90,85,70
english,98,88,95
music,130,79,78
PE,120,99,75


In [15]:
ndf1 = df.set_index('music')
ndf1

Unnamed: 0_level_0,math,english,PE
music,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
130,90,98,120
79,85,88,99
78,70,95,75


In [16]:
ndf2 = df.set_index(['math', 'music'])
ndf2

Unnamed: 0_level_0,Unnamed: 1_level_0,english,PE
math,music,Unnamed: 2_level_1,Unnamed: 3_level_1
90,130,98,120
85,79,88,99
70,78,95,75


In [22]:
ndf2.iloc[0, 0]

98

In [23]:
# 인덱스를 초기화
# 기존 인덱스를 덮어쓰기하고 숫자인덱스로 초기화

ndf3 = ndf2.reset_index()
ndf3

Unnamed: 0,math,music,english,PE
0,90,130,98,120
1,85,79,88,99
2,70,78,95,75


In [25]:
# 리셋을 다시 시켜보면?
# 숫자 인덱스가 새 칼럼으로 추가된다
ndf4 = ndf3.reset_index()
ndf4

Unnamed: 0,index,math,music,english,PE
0,0,90,130,98,120
1,1,85,79,88,99
2,2,70,78,95,75


In [26]:
# 또 하면 level_0 칼럼이 새로 생성
# reset_index()가 작동하는 방식에 대해 유추 가능
# 불필요하게 중복 사용할 시 문제될 수 있음

ndf5 = ndf4.reset_index()
ndf5

Unnamed: 0,level_0,index,math,music,english,PE
0,0,0,90,130,98,120
1,1,1,85,79,88,99
2,2,2,70,78,95,75


In [27]:
# 전혀 새로운 인덱스로 재초기화 하면
# DataFrame 사이즈는 유지하되 내부 데이터는 모두 NaN 처리된다
new_index = ['billy', 'irish', 'a', 'b', 'c']
ndf = df.reindex(new_index)
ndf

Unnamed: 0_level_0,math,english,music,PE
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
billy,,,,
irish,,,,
a,,,,
b,,,,
c,,,,


In [30]:
ndf_sorted_by_index = ndf.sort_index()
ndf_sorted_by_index

Unnamed: 0_level_0,math,english,music,PE
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
a,,,,
b,,,,
billy,,,,
c,,,,
irish,,,,


In [31]:
ndf_sorted_by_index_descending = ndf.sort_index(ascending = False)
ndf_sorted_by_index_descending

Unnamed: 0_level_0,math,english,music,PE
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
irish,,,,
c,,,,
billy,,,,
b,,,,
a,,,,


In [32]:
df_sorted_by_math = df.sort_values(by='math')
df_sorted_by_math

Unnamed: 0_level_0,math,english,music,PE
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Sun,70,95,78,75
Snow,85,88,79,99
John,90,98,130,120


In [33]:
df_sorted_by_math_descending = df.sort_values(by='math', ascending=False)
df_sorted_by_math_descending

Unnamed: 0_level_0,math,english,music,PE
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
John,90,98,130,120
Snow,85,88,79,99
Sun,70,95,78,75


## 타이타닉 데이터

In [35]:
titanic = sns.load_dataset('titanic')
print(type(titanic))

titanic.head()

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [38]:
df = titanic.loc[:, ['age', 'fare']]
df.head()

Unnamed: 0,age,fare
0,22.0,7.25
1,38.0,71.2833
2,26.0,7.925
3,35.0,53.1
4,35.0,8.05
