# Series

- pandas에서 사용하는 1차원 배열
- index를 사용할 수 있습니다.

In [1]:
import pandas as pd
import numpy as np

In [7]:
arr = np.arange(100, 110)
print(arr)

[100 101 102 103 104 105 106 107 108 109]


In [8]:
s = pd.Series(arr)
print(s)

0    100
1    101
2    102
3    103
4    104
5    105
6    106
7    107
8    108
9    109
dtype: int64


In [10]:
s = pd.Series(arr, dtype = 'int32')
print(s)

0    100
1    101
2    102
3    103
4    104
5    105
6    106
7    107
8    108
9    109
dtype: int32


In [11]:
s = pd.Series(['kim', 'lee', 'park'])
print(s)

0     kim
1     lee
2    park
dtype: object


In [16]:
s = pd.Series([1, 2, 3, '1', '2', 'hello'])
print(s)

0        1
1        2
2        3
3        1
4        2
5    hello
dtype: object


In [17]:
s[5]

'hello'

In [19]:
# 음수 인덱싱 지원x
s[-1]

KeyError: -1

In [20]:
s.index

RangeIndex(start=0, stop=6, step=1)

In [25]:
names =pd.Series(['kim', 'lee', 'park'], index=['a', 'b', 'c'])

In [29]:
names.index
names[0]
names.iloc[0]

  names[0]


'kim'

In [31]:
names.values

array(['kim', 'lee', 'park'], dtype=object)

In [32]:
names.ndin

AttributeError: 'Series' object has no attribute 'ndin'

## NmN(not a Numbrr)

In [35]:
s =pd.Series([1,2,3 , np.nan])
print(s)

0    1.0
1    2.0
2    3.0
3    NaN
dtype: float64


In [36]:
## fancy indexing

In [38]:
f =['banana', 'apple', 'grape', np.nan]
s = pd.Series(f, index = list('abcd'))
print(s)

a    banana
b     apple
c     grape
d       NaN
dtype: object


In [41]:
s[['d', 'a']]

d       NaN
a    banana
dtype: object

In [42]:
s[[3, 1]]

  s[[3, 1]]


d      NaN
b    apple
dtype: object

## bool indexing

In [43]:
s

a    banana
b     apple
c     grape
d       NaN
dtype: object

In [44]:
s[[True, False, True, False]]

a    banana
c     grape
dtype: object

In [45]:
s == 'banana'

a     True
b    False
c    False
d    False
dtype: bool

In [46]:
s[s == 'banana']

a    banana
dtype: object

In [49]:
s = pd.Series([1,2,3,4,5,6])
s[s > 3]

3    4
4    5
5    6
dtype: int64

## 결측치(NaN) 처리

In [50]:
s = pd.Series([1, 3, np.nan, 10, 11, np.nan])
print(s)

0     1.0
1     3.0
2     NaN
3    10.0
4    11.0
5     NaN
dtype: float64


In [51]:
s.isnull()

0    False
1    False
2     True
3    False
4    False
5     True
dtype: bool

In [52]:
s[s.isnull()]
#s[s.isna()]

2   NaN
5   NaN
dtype: float64

In [54]:
#s[s.notna()]
s[s.notnull()]

0     1.0
1     3.0
3    10.0
4    11.0
dtype: float64

## slicing

In [55]:
s[1:3]

1    3.0
2    NaN
dtype: float64

In [57]:
s = pd.Series([1, 2, 3], index=list('abc'))
s[1:2]
s['a':'b']

a    1
b    2
dtype: int64

# Dataframe
- 2차원 데이터 구조(excel, sheet와 유사)
- 행(row), 열(column) 구조


In [59]:
d = pd.DataFrame([
    [1,2,3],
    [4,5,6],
    [7,8,9]
])
d

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [66]:
d = pd.DataFrame([
    [1,2,3],
    [4,5,6],
    [7,8,9]
], columns = ['가', '나','다'])
d

Unnamed: 0,가,나,다
0,1,2,3
1,4,5,6
2,7,8,9


In [61]:
info = {
    'name' : ['kim', 'lee', 'park'],
    'age' : [10, 20, 30]
}

In [62]:
pd.DataFrame(info)
#데이터 길이 같아야 함.

Unnamed: 0,name,age
0,kim,10
1,lee,20
2,park,30


In [67]:
info_df = pd.DataFrame(info)

In [69]:
info_df.index
#행의 개수

RangeIndex(start=0, stop=3, step=1)

In [73]:
info_df.columns
#열

Index(['name', 'age'], dtype='object')

In [74]:
info_df.values
#가운데 데이터

array([['kim', 10],
       ['lee', 20],
       ['park', 30]], dtype=object)

In [75]:
info_df.dtypes

name    object
age      int64
dtype: object

In [77]:
info_df

Unnamed: 0,name,age
0,kim,10
1,lee,20
2,park,30


In [78]:
info_df.T

Unnamed: 0,0,1,2
name,kim,lee,park
age,10,20,30


## index 지정

In [80]:
info_df.index

RangeIndex(start=0, stop=3, step=1)

In [81]:
info_df

Unnamed: 0,name,age
0,kim,10
1,lee,20
2,park,30


## column 다루기

In [84]:
info_df.columns

Index(['name', 'age'], dtype='object')

In [85]:
print(info_df['name'])
print(type(info_df['name']))

0     kim
1     lee
2    park
Name: name, dtype: object
<class 'pandas.core.series.Series'>


In [87]:
info_df[['age', 'name']]
#대괄호 두번 fancy indexing

Unnamed: 0,age,name
0,10,kim
1,20,lee
2,30,park


In [93]:
info_df.rename(columns={'name':'이름'})
# 한글 행열 값을 영어로 바꿀 때 사용함. , 원본을 바꾸진 않음 바꾸려면?

info_df = info_df.rename(columns={'name':'이름'})
# info_df.rename(columns={'이름': 'last_name'}, inplace=True)

In [94]:
info_df

Unnamed: 0,이름,age
0,kim,10
1,lee,20
2,park,30


In [95]:
info_df.rename(columns={'이름': 'last_name'}, inplace=True)

In [96]:
info_df

Unnamed: 0,last_name,age
0,kim,10
1,lee,20
2,park,30
