In [1]:
import pandas as pd
pd.__version__

'2.2.3'

In [3]:
data = ["시가", "고가"]
s = pd.Series(data)
s

0    시가
1    고가
dtype: object

In [5]:
data = [80000, "90000"]
s = pd.Series(data)
s

0    80000
1    90000
dtype: object

In [8]:
data = [1000, 2000, 3000]
s = pd.Series(data)
print(s.index, type(s.index))

RangeIndex(start=0, stop=3, step=1) <class 'pandas.core.indexes.range.RangeIndex'>


In [9]:
list(s.index)

[0, 1, 2]

In [10]:
s.index.to_list()

[0, 1, 2]

In [14]:
# help(s.index) # 클래스에는 항상 속성과 메서드 존재

In [29]:
# 시리즈 생성하면서 인덱스도 같이 생성
data = [1000, 2000, 3000]
index = ["A", "B", "C"]

s1 = pd.Series(data, index)
s1

A    1000
B    2000
C    3000
dtype: int64

In [30]:
s.values, type(s.values)

(array([1000, 2000, 3000]), numpy.ndarray)

# 시리즈 인덱싱

In [33]:
print(s1.iloc[2])

3000


In [34]:
print(s1.loc["B"])

2000


In [36]:
# 시리즈 생성하면서 인덱스도 같이 생성
data = [1000, 2000, 3000]
# index가 없음
s2 = pd.Series(data)
print

0    1000
1    2000
2    3000
dtype: int64

In [39]:
print(s2.iloc[0]) # iloc에서의 0은 위치 0을 의미
print(s2.loc[0]) # loc에서의 0은 라벨 0을 의미

1000
1000


In [40]:
s1

A    1000
B    2000
C    3000
dtype: int64

In [41]:
s2

0    1000
1    2000
2    3000
dtype: int64

In [47]:
s1.iloc[0:2]

A    1000
B    2000
dtype: int64

In [48]:
s1.loc["A":"B"]

A    1000
B    2000
dtype: int64

In [49]:
s1["B"] = 500
s1

A    1000
B     500
C    3000
dtype: int64

In [52]:
s1.iloc[0] = 500
s

0     500
1    2000
2    3000
dtype: int64

# 시리즈 연산

In [56]:
date = ["6/1", "6/2", "6/3", "6/4", "6/5"]
high = pd.Series([42800, 42700, 42050, 42950, 43000], index=date)
low = pd.Series([42150, 42150, 41300, 42150, 42350] , index=date)

diff = high - low
print(diff)

6/1    650
6/2    550
6/3    750
6/4    800
6/5    650
dtype: int64


In [59]:
diff.idxmax(), diff[diff.idxmax()]

('6/4', np.int64(800))

In [62]:
diff.idxmin(), diff[diff.idxmin()]

('6/2', np.int64(550))

## 시리즈아 Map

In [63]:
text = '1,234'
text.replace(',','')

'1234'

In [64]:
int(text.replace(',',''))

1234

In [67]:
num_str = ['1,234', '5,678', '12,345']
results = []
for num in num_str:
    temp = int(num.replace(',',''))
    results.append(temp)

results

[1234, 5678, 12345]

In [68]:
def rm_comma(x):
    return int(x.replace(',',''))

In [69]:
s = pd.Series(['1,234', '5,678', '12,345'])
result = s.map(rm_comma)
result

0     1234
1     5678
2    12345
dtype: int64

### 문제
- 기준점이 13이상
-  + 크다 or 작다

In [76]:
s = pd.Series([5, 10, 15, 20])

def measure(x):
    if x>=13:
        return "크다"
    else:
        return "작다"

s.map(measure)

0    작다
1    작다
2    크다
3    크다
dtype: object

In [72]:
def is_greater_13(x): 

    return_text = None
    if x >= 13:
        return "크다"
    else:
        return "작다" 

s = pd.Series([10, 15, 7, 20, 13])
s.map(is_greater_13)

0    작다
1    크다
2    작다
3    크다
4    크다
dtype: object

In [73]:
result = s.map(lambda x: '크다' if x >=13 else '작다')
print(result)

0    작다
1    크다
2    작다
3    크다
4    크다
dtype: object


In [74]:
result = s.apply(lambda x: '크다' if x >=13 else '작다')
print(result)

0    작다
1    크다
2    작다
3    크다
4    크다
dtype: object


## 필터링

In [79]:
data = [42500, 42550, 41800, 42550, 42650]
index = ['2019-05-31', '2019-05-30', '2019-05-29', '2019-05-28', '2019-05-27']
s = pd.Series(data=data, index=index)
s

2019-05-31    42500
2019-05-30    42550
2019-05-29    41800
2019-05-28    42550
2019-05-27    42650
dtype: int64

In [80]:
cond = s > 42000
print(cond)

2019-05-31     True
2019-05-30     True
2019-05-29    False
2019-05-28     True
2019-05-27     True
dtype: bool


In [81]:
s[cond]

2019-05-31    42500
2019-05-30    42550
2019-05-28    42550
2019-05-27    42650
dtype: int64

In [92]:
close = [42500, 42550, 41800, 42550, 42650]
open = [42600, 42200, 41850, 42550, 42500]
index = ['2019-05-31', '2019-05-30', '2019-05-29', '2019-05-28', '2019-05-27']

open = pd.Series(data=open, index=index)
close = pd.Series(data=close, index=index)

open, close

(2019-05-31    42600
 2019-05-30    42200
 2019-05-29    41850
 2019-05-28    42550
 2019-05-27    42500
 dtype: int64,
 2019-05-31    42500
 2019-05-30    42550
 2019-05-29    41800
 2019-05-28    42550
 2019-05-27    42650
 dtype: int64)

In [94]:
close[close > open]

2019-05-30    42550
2019-05-27    42650
dtype: int64