# Series 데이터

In [2]:
import pandas as pd
import numpy as np

In [3]:
s = pd.Series([1,2,3,np.nan])

In [4]:
s

0    1.0
1    2.0
2    3.0
3    NaN
dtype: float64

## index를 사용한다.

In [5]:
s = pd.Series(1,index=list(range(4)),dtype='float32')

In [6]:
s

0    1.0
1    1.0
2    1.0
3    1.0
dtype: float32

In [8]:
s = pd.Series([1,2,3],index=list('abc'))
s

a    1
b    2
c    3
dtype: int64

In [9]:
s.index

Index(['a', 'b', 'c'], dtype='object')

In [11]:
pd.Series(d,index=['a','b','c'])

a    0
b    1
c    2
dtype: int64

## ndarray와 비슷하게 동작한다.

In [12]:
s=pd.Series(np.random.randn(5),index=list('abcde'))

In [13]:
s

a   -1.096483
b    0.068500
c    0.471814
d    1.240316
e    1.133816
dtype: float64

In [14]:
s[0]

-1.0964825667082199

In [15]:
s[:3]

a   -1.096483
b    0.068500
c    0.471814
dtype: float64

In [16]:
s[s>s.median()]

d    1.240316
e    1.133816
dtype: float64

In [17]:
s[[4,3,1]]

e    1.133816
d    1.240316
b    0.068500
dtype: float64

In [18]:
np.exp(s)

a    0.334044
b    1.070900
c    1.602899
d    3.456705
e    3.107492
dtype: float64

## numpy와의 관련성

In [19]:
s.dtype

dtype('float64')

In [20]:
s.array

<PandasArray>
[-1.0964825667082199, 0.06849987951327849, 0.47181358498358517,
   1.240315942077287,   1.133815926760271]
Length: 5, dtype: float64

In [21]:
s.to_numpy()

array([-1.09648257,  0.06849988,  0.47181358,  1.24031594,  1.13381593])

## dict와의 관련성

In [25]:
d = {'b':1,'a':0,'c':2}
d=pd.Series(d)

In [26]:
d['a']

0

In [27]:
d['a']=100

In [28]:
d

b      1
a    100
c      2
dtype: int64

In [30]:
'c' in d

True

In [37]:
try:
    d['f']
except KeyError:
    print("f is not there")

f is not there


In [40]:
d.get('f') is None

True

In [39]:
d.get('a')

100

## Series의 벡터 연산, index가 있기 때문에 생기는 차이

In [41]:
s

a   -1.096483
b    0.068500
c    0.471814
d    1.240316
e    1.133816
dtype: float64

In [42]:
s+2

a    0.903517
b    2.068500
c    2.471814
d    3.240316
e    3.133816
dtype: float64

In [43]:
s*2

a   -2.192965
b    0.137000
c    0.943627
d    2.480632
e    2.267632
dtype: float64

In [44]:
np.exp(s)

a    0.334044
b    1.070900
c    1.602899
d    3.456705
e    3.107492
dtype: float64

In [45]:
s[1:]

b    0.068500
c    0.471814
d    1.240316
e    1.133816
dtype: float64

In [46]:
s[:-1]

a   -1.096483
b    0.068500
c    0.471814
d    1.240316
dtype: float64

In [47]:
s[1:]+s[:-1]

a         NaN
b    0.137000
c    0.943627
d    2.480632
e         NaN
dtype: float64

## Name, rename

In [48]:
s=pd.Series(np.random.randn(5),name='my name')

In [49]:
s

0    0.472033
1    0.056099
2   -0.228854
3    1.279182
4    0.858202
Name: my name, dtype: float64

In [50]:
s.name

'my name'

In [51]:
s1=s.rename("different")
s1

0    0.472033
1    0.056099
2   -0.228854
3    1.279182
4    0.858202
Name: different, dtype: float64

In [57]:
id(s) is not id(s1)

True

In [59]:
s.head(n=2)

0    0.472033
1    0.056099
Name: my name, dtype: float64

In [60]:
s.tail(n=2)

3    1.279182
4    0.858202
Name: my name, dtype: float64

In [61]:
ser = pd.Series(pd.date_range('2000',periods=2))

In [62]:
ser

0   2000-01-01
1   2000-01-02
dtype: datetime64[ns]

In [63]:
ser.to_numpy(dtype=object)

array([Timestamp('2000-01-01 00:00:00'), Timestamp('2000-01-02 00:00:00')],
      dtype=object)

In [64]:
ser.to_numpy(dtype='datetime64[ns]')

array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000'],
      dtype='datetime64[ns]')

## mapping

In [65]:
s=pd.Series(
    ['six','seven','six','seven','six'],
    index=list('abcde')
)

In [66]:
t=pd.Series({'six':6,'seven':7})

In [67]:
s

a      six
b    seven
c      six
d    seven
e      six
dtype: object

In [68]:
t

six      6
seven    7
dtype: int64

In [69]:
s.map(t)

a    6
b    7
c    6
d    7
e    6
dtype: int64