# Series

- pandas에서 사용하는 1차원 배열.
- index를 사용 할 수 있습니다.

In [1]:
import pandas as pd
import numpy as np

In [19]:
arr = np.arange(100, 110)
print(arr)

[100 101 102 103 104 105 106 107 108 109]


In [20]:
s = pd.Series(arr)
print(s)

0    100
1    101
2    102
3    103
4    104
5    105
6    106
7    107
8    108
9    109
dtype: int64


In [26]:
s = pd.Series(arr, dtype= 'float32')
print(s)

0    100.0
1    101.0
2    102.0
3    103.0
4    104.0
5    105.0
6    106.0
7    107.0
8    108.0
9    109.0
dtype: float32


In [29]:
s = pd.Series(['kim','lee', 'park','hello'])
print(s)

0      kim
1      lee
2     park
3    hello
dtype: object


In [31]:
s[3]

'hello'

In [33]:
#음수 인덱싱은 지원하지 않음
#s[-1]

In [34]:
s.index

RangeIndex(start=0, stop=4, step=1)

In [44]:
names = pd.Series(['kim','lee', 'park'], index =['a', 'b', 'c'])

In [48]:
#names[0]
names.iloc[0] #숫자형태로 인덱스에 접근 하고 싶을 때

'kim'

In [43]:
names['a'] # names[0] 

'kim'

In [42]:
names.index

Index(['a', 'b', 'c'], dtype='object')

In [49]:
names.values

array(['kim', 'lee', 'park'], dtype=object)

In [51]:
names.ndim #몇차원인지

1

In [52]:
names.shape

(3,)

## NaN(Not a Number)

In [54]:
s = pd.Series([1, 2, 3, np.nan])
print(s)

0    1.0
1    2.0
2    3.0
3    NaN
dtype: float64


## fancy indexing

In [56]:
f = ['banana' , 'apple', 'grape', np.nan]
s = pd.Series(f, index=list('abcd'))
print(s)

a    banana
b     apple
c     grape
d       NaN
dtype: object


In [57]:
s['a']

'banana'

In [59]:
s[['d', 'a']]

d       NaN
a    banana
dtype: object

In [60]:
# s[[3,1]]
s.iloc[[3,1]]

d      NaN
b    apple
dtype: object

## bool indexing

In [61]:
s

a    banana
b     apple
c     grape
d       NaN
dtype: object

In [63]:
s[[True, False, True, False]]

a    banana
c     grape
dtype: object

In [64]:
s == 'banana'

a     True
b    False
c    False
d    False
dtype: bool

In [65]:
s[s == 'banana']

a    banana
dtype: object

In [73]:
s = pd.Series([1, 2, 3, 4, 5, 6])
s > 3

0    False
1    False
2    False
3     True
4     True
5     True
dtype: bool

In [74]:
s = pd.Series([1, 2, 3, 4, 5, 6])
s[s >3]

3    4
4    5
5    6
dtype: int64

## 결측치(NaN)처리

In [75]:
s = pd.Series([1,3, np.nan, 10, 11, np.nan])

In [77]:
s.isnull

<bound method Series.isnull of 0     1.0
1     3.0
2     NaN
3    10.0
4    11.0
5     NaN
dtype: float64>

In [79]:
s[s.isnull()]

2   NaN
5   NaN
dtype: float64

In [80]:
#s[s.isnul()]
s[s.isna()]

2   NaN
5   NaN
dtype: float64

In [84]:
s[s.notnull()]

0     1.0
1     3.0
3    10.0
4    11.0
dtype: float64

In [87]:
s = pd.Series([1, 2, 3]), index=list('abc'))
s[1:2]

SyntaxError: unmatched ')' (1890276119.py, line 1)

# Dataframe
- 2차원 데이터 구조(excel, sheet와 유사)
- 행(row), 열(column)구조

In [89]:
d = pd.DataFrame([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
])
d

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [91]:
d = pd.DataFrame([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
], columns=['가', '나', '다'])
d

Unnamed: 0,가,나,다
0,1,2,3
1,4,5,6
2,7,8,9


In [92]:
info = {
    'name' : ['kim', 'lee', 'park'],
    'age' : [10, 20, 30]
}

In [94]:
info_df = pd.DataFrame(info)

In [95]:
info_df.index

RangeIndex(start=0, stop=3, step=1)

In [96]:
info_df.values

array([['kim', 10],
       ['lee', 20],
       ['park', 30]], dtype=object)

In [98]:
info_df.columns

Index(['name', 'age'], dtype='object')

In [99]:
info_df.dtypes

name    object
age      int64
dtype: object

In [102]:
info_df.T

Unnamed: 0,0,1,2
name,kim,lee,park
age,10,20,30


# index 지정

In [105]:
info_df.index = list('abc')

In [106]:
info_df

Unnamed: 0,name,age
a,kim,10
b,lee,20
c,park,30


# column 다루기

In [109]:
info_df.columns

Index(['name', 'age'], dtype='object')

In [112]:
print(info_df['name'])
print(type(info_df['name']))

a     kim
b     lee
c    park
Name: name, dtype: object
<class 'pandas.core.series.Series'>


In [113]:
info_df[['age', 'name']]

Unnamed: 0,age,name
a,10,kim
b,20,lee
c,30,park


In [114]:
info_df.rename(columns = {'name' : '이름'})

Unnamed: 0,이름,age
a,kim,10
b,lee,20
c,park,30


In [115]:
info_df

Unnamed: 0,name,age
a,kim,10
b,lee,20
c,park,30


In [116]:
info_df = info_df.rename(columns = {'name' : '이름'}) #재할당을 해줘야 저장

In [117]:
info_df

Unnamed: 0,이름,age
a,kim,10
b,lee,20
c,park,30


In [118]:
info_df.rename(columns={'이름':'last_name'}, inplace = True) #따로 할당 안해도 바로 저장해서 출력

In [119]:
info_df

Unnamed: 0,last_name,age
a,kim,10
b,lee,20
c,park,30
