# Pandas
### Series

In [2]:
import pandas as pd

In [3]:
dict_data = {'a' : 1, 'b' : 2, 'c' : 3}
sr = pd.Series(dict_data)
print(type(sr))
print(sr)

<class 'pandas.core.series.Series'>
a    1
b    2
c    3
dtype: int64


In [4]:
print(type(sr.index), sr.index) # 시리즈 인덱스 = '오브젝트' 자료형
print(type(sr.values), sr.values) # 시리즈 밸류 = '넘파이 어레이' 자료형

<class 'pandas.core.indexes.base.Index'> Index(['a', 'b', 'c'], dtype='object')
<class 'numpy.ndarray'> [1 2 3]


In [5]:
print(sr['a'] + sr['b'])  # 수동 지정한 인덱스 이름으로 호출

3


In [6]:
print(sr[0]) # 자동 지정되는 디폴트 숫자 인덱스로 호출

1


In [7]:
list_data = ['2021-1-5', 3.14, 'ABC', 100, True]
sr = pd.Series(list_data)
print(sr)

0    2021-1-5
1        3.14
2         ABC
3         100
4        True
dtype: object


In [8]:
tup_data = ('2021-1-5', 'SKY', 200, False)
tup_index = ['가입일', '이름', '잔액', '할인쿠폰']
sr = pd.Series(tup_data, index = tup_index)
print(sr)
print(sr[0], sr['가입일'])

가입일     2021-1-5
이름           SKY
잔액           200
할인쿠폰       False
dtype: object
2021-1-5 2021-1-5


## DataFrame

In [36]:
dict_data = {'c0':[1,2,3], 'c1':[4,5,6], 'c2':[7,8,9],
            'c3':[10,11,12], 'c4':[13,14,15]}
df = pd.DataFrame(dict_data)

print(type(df))
df

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,c0,c1,c2,c3,c4
0,1,4,7,10,13
1,2,5,8,11,14
2,3,6,9,12,15


In [37]:
df = pd.DataFrame([[15, 'male', 'yMiddleSchool'],
                  [17, 'female', 'sMiddleSchool']],
                 index=['seojun', 'doyeon'],
                 columns=['age', 'sex', 'school'])

df

Unnamed: 0,age,sex,school
seojun,15,male,yMiddleSchool
doyeon,17,female,sMiddleSchool


In [38]:
df[::]

Unnamed: 0,age,sex,school
seojun,15,male,yMiddleSchool
doyeon,17,female,sMiddleSchool


In [39]:
df[:2]

Unnamed: 0,age,sex,school
seojun,15,male,yMiddleSchool
doyeon,17,female,sMiddleSchool


In [40]:
df['age']

seojun    15
doyeon    17
Name: age, dtype: int64

In [41]:
df.index = ['student1', 'student2']
df

Unnamed: 0,age,sex,school
student1,15,male,yMiddleSchool
student2,17,female,sMiddleSchool


In [42]:
df.index[1] = 'studentSeojun'
df

TypeError: Index does not support mutable operations

In [43]:
df.rename(index={'student1' : 'studentSeojun'})

Unnamed: 0,age,sex,school
studentSeojun,15,male,yMiddleSchool
student2,17,female,sMiddleSchool


In [44]:
df.rename(columns={'school':'organization'})

Unnamed: 0,age,sex,organization
student1,15,male,yMiddleSchool
student2,17,female,sMiddleSchool


In [45]:
df

Unnamed: 0,age,sex,school
student1,15,male,yMiddleSchool
student2,17,female,sMiddleSchool


In [46]:
df = df.rename(index={'student1':'studentSeojun'}, columns={'school':'organization'})
df

Unnamed: 0,age,sex,organization
studentSeojun,15,male,yMiddleSchool
student2,17,female,sMiddleSchool


In [47]:
df2 = df.copy()
df3 = df2.rename(index={'studentSeojun':'Seojun'}, columns={'organization':'place'}, inplace=True)

print(df3)
df2

None


Unnamed: 0,age,sex,place
Seojun,15,male,yMiddleSchool
student2,17,female,sMiddleSchool


### DataFrame의 인덱싱

In [54]:
print(type(df2['age']))
df2['age']

<class 'pandas.core.series.Series'>


Seojun      15
student2    17
Name: age, dtype: int64

In [55]:
print(type(df2.loc['Seojun']))
df2.loc['Seojun']

<class 'pandas.core.series.Series'>


age                 15
sex               male
place    yMiddleSchool
Name: Seojun, dtype: object

In [56]:
df2.loc[:, :]

Unnamed: 0,age,sex,place
Seojun,15,male,yMiddleSchool
student2,17,female,sMiddleSchool


### DataFrame 데이터 수정, 삭제, 추가

In [63]:
df = pd.DataFrame([[15, 'male', 'yMiddleSchool'],
                  [17, 'female', 'sMiddleSchool']],
                 index=['seojun', 'doyeon'],
                 columns=['age', 'sex', 'school'])

df

Unnamed: 0,age,sex,school
seojun,15,male,yMiddleSchool
doyeon,17,female,sMiddleSchool


In [66]:
df['age'] = df['age'].apply(lambda x: x+1)
df

Unnamed: 0,age,sex,school
seojun,16,male,yMiddleSchool
doyeon,18,female,sMiddleSchool


In [67]:
df.loc['seojun'] = ['17', 'male', 'kMiddleSchool']
df

Unnamed: 0,age,sex,school
seojun,17,male,kMiddleSchool
doyeon,18,female,sMiddleSchool


In [70]:
# jisoo 인덱스 행 추가
df.loc['jisoo'] = ['25', 'female', 'eMiddleSchool']
df

Unnamed: 0,age,sex,school
seojun,17,male,kMiddleSchool
doyeon,18,female,sMiddleSchool
jisoo,25,female,eMiddleSchool


In [71]:
# row drop
df_dropped = df.drop(['doyeon', 'jisoo'])
df_dropped

Unnamed: 0,age,sex,school
seojun,17,male,kMiddleSchool


In [72]:
# 열 추가
df['height'] = [175, 156, 185]
df

Unnamed: 0,age,sex,school,height
seojun,17,male,kMiddleSchool,175
doyeon,18,female,sMiddleSchool,156
jisoo,25,female,eMiddleSchool,185


In [74]:
df['weight'] = [65, 42, 59]
df

Unnamed: 0,age,sex,school,height,weight
seojun,17,male,kMiddleSchool,175,65
doyeon,18,female,sMiddleSchool,156,42
jisoo,25,female,eMiddleSchool,185,59


In [75]:
# column drop
df_dropped = df.drop(['weight'], axis=1)
df_dropped

Unnamed: 0,age,sex,school,height
seojun,17,male,kMiddleSchool,175
doyeon,18,female,sMiddleSchool,156
jisoo,25,female,eMiddleSchool,185


### DataFrame 인덱싱/ 슬라이싱

In [86]:
df_height_series = df.height
print(type(df_height_series))
df_height_series

<class 'pandas.core.series.Series'>


seojun    175
doyeon    156
jisoo     185
Name: height, dtype: int64

In [87]:
df_height_series2 = df['height']
print(type(df_height_series2))
df_height_series2

<class 'pandas.core.series.Series'>


seojun    175
doyeon    156
jisoo     185
Name: height, dtype: int64

In [88]:
df_height_dataframe = df[['height']]
print(type(df_height_dataframe))
df_height_dataframe

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,height
seojun,175
doyeon,156
jisoo,185


In [85]:
df[['height', 'weight']]

Unnamed: 0,height,weight
seojun,175,65
doyeon,156,42
jisoo,185,59


In [77]:
label = df.loc[['seojun', 'doyeon']]
label

Unnamed: 0,age,sex,school,height,weight
seojun,17,male,kMiddleSchool,175,65
doyeon,18,female,sMiddleSchool,156,42


In [83]:
label2 = df.iloc[:1]
label2

Unnamed: 0,age,sex,school,height,weight
seojun,17,male,kMiddleSchool,175,65


In [84]:
label3 = df.iloc[:1, :3]
label3

Unnamed: 0,age,sex,school
seojun,17,male,kMiddleSchool


In [80]:
label4 = df.loc[:'jisoo']
label4

Unnamed: 0,age,sex,school,height,weight
seojun,17,male,kMiddleSchool,175,65
doyeon,18,female,sMiddleSchool,156,42
jisoo,25,female,eMiddleSchool,185,59


In [89]:
df[df['height'] < 160]

Unnamed: 0,age,sex,school,height,weight
doyeon,18,female,sMiddleSchool,156,42


In [92]:
print(type(df['height'] < 160))
print(dict(df['height'] < 160))
df['height'] < 160

<class 'pandas.core.series.Series'>
{'seojun': False, 'doyeon': True, 'jisoo': False}


seojun    False
doyeon     True
jisoo     False
Name: height, dtype: bool

In [97]:
df.loc['doyeon', ['height']]

height    156
Name: doyeon, dtype: object

In [96]:
df.loc[['doyeon', 'seojun'], ['weight', 'height']]

Unnamed: 0,weight,height
doyeon,42,156
seojun,65,175
