# 시리즈 만들기

In [7]:
import pandas as pd
s = pd.Series(['banana', 42])
print(s)

0    banana
1        42
dtype: object
Person         Wes McKinney
Who       Creator of Pandas
dtype: object


In [8]:
s = pd.Series(['Wes McKinney', 'Creator of Pandas'], index=['Person', 'Who'])
print(s)

Person         Wes McKinney
Who       Creator of Pandas
dtype: object


# 데이터프레임 만들기

In [15]:
scientists = pd.DataFrame({
    'Name' : ['Rosaline Franklin', 'William Gosset'],
    'Occupation' : ['Chemist', 'Statistician'],
    'Born' : ['1920-07-25', '1876-06-13'],
    'Died' : ['1958-04-16', '1937-10-16'],
    'Age' : [37, 61]
})
print(scientists)

                Name    Occupation        Born        Died  Age
0  Rosaline Franklin       Chemist  1920-07-25  1958-04-16   37
1     William Gosset  Statistician  1876-06-13  1937-10-16   61


In [14]:
scientists = pd.DataFrame(
    data = {
    'Name' : ['Rosaline Franklin', 'William Gosset'],
    'Occupation' : ['Chemist', 'Statistician'],
    'Born' : ['1920-07-25', '1876-06-13'],
    'Died' : ['1958-04-16', '1937-10-16'],
    'Age' : [37, 61]
    },
    index = ['Rosaline Franklin', 'William Gosset'],
    columns = ['Occupation', 'Born', 'Age', 'Died']
)
print(scientists)

                     Occupation        Born  Age        Died
Rosaline Franklin       Chemist  1920-07-25   37  1958-04-16
William Gosset     Statistician  1876-06-13   61  1937-10-16


In [16]:
from collections import OrderedDict

scientists = pd.DataFrame(OrderedDict([
    ('Name', ['Rosaline Franklin', 'William Gosset']),
    ('Occupation', ['Chemist', 'Statistician']),
    ('Born', ['1920-07-25', '1876-06-13']),
    ('Died', ['1958-04-16', '1937-10-16']),
    ('Age', [37, 61])
]))
print(scientists)

                Name    Occupation        Born        Died  Age
0  Rosaline Franklin       Chemist  1920-07-25  1958-04-16   37
1     William Gosset  Statistician  1876-06-13  1937-10-16   61


# 데이터프레임에서 시리즈 선택하기

In [36]:
scientists = pd.DataFrame(
    data={'Occupation' : ['Chemist', 'Statistician'],
          'Born' : ['1920-07-25', '1876-06-13'],
          'Died' : ['1958-04-16', '1937-10-16'],
          'Age' : [37,61]},
    index=['Rosalin Franklin', 'Willian Gosset'],
    columns=['Occupation', 'Born', 'Died', 'Age']
)

first_row = scientists.loc['Willian Gosset']
print(scientists)

                    Occupation        Born        Died  Age
Rosalin Franklin       Chemist  1920-07-25  1958-04-16   37
Willian Gosset    Statistician  1876-06-13  1937-10-16   61


# index, values, keys 사용하기

In [31]:
first_row.keys()[0]

'Occupation'

# 시리즈의 mean, min, max, std 메서드 사용하기

In [39]:
ages = scientists['Age']
ages.std()

16.97056274847714

# 시리즈와 불린 추출 사용하기

In [48]:
scientists = pd.read_csv('../data/scientists.csv')
ages = scientists['Age']
type(ages[ages > ages.mean()])

pandas.core.series.Series

# 시리즈와 브로드캐스팅

In [53]:
ages + ages.sort_index(ascending=False)

0     74
1    122
2    180
3    132
4    112
5     90
6     82
7    154
Name: Age, dtype: int64

# 데이터프레임과 불린 추출

# 데이터프레임과 브로드캐스팅

                Name        Born        Died  Age    Occupation
0  Rosaline Franklin  1920-07-25  1958-04-16   37       Chemist
1     William Gosset  1876-06-13  1937-10-16   61  Statistician
3        Marie Curie  1867-11-07  1934-07-04   66       Chemist


# 열의 자료형 바꾸기와 새로운 열 추가하기

In [68]:
born_datetime = pd.to_datetime(scientists['Born'], format='%Y-%m-%d')
died_datetime = pd.to_datetime(scientists['Died'], format='%Y-%m-%d')

scientists['born_dt'], scientists['died_dt'] = (born_datetime, died_datetime)

scientists['age_days_dt'] = (scientists['died_dt'] - scientists['born_dt'])
print(scientists)

                   Name        Born        Died  Age          Occupation   
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist  \
1        William Gosset  1876-06-13  1937-10-16   61        Statistician   
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse   
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist   
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist   
5             John Snow  1813-03-15  1858-06-16   45           Physician   
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist   
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician   

     born_dt    died_dt age_days_dt  
0 1920-07-25 1958-04-16  13779 days  
1 1876-06-13 1937-10-16  22404 days  
2 1820-05-12 1910-08-13  32964 days  
3 1867-11-07 1934-07-04  24345 days  
4 1907-05-27 1964-04-14  20777 days  
5 1813-03-15 1858-06-16  16529 days  
6 1912-06-23 1954-06-07  15324 days  
7 1777-04-3

# 시리즈, 데이터프레임의 데이터 섞어보기

In [None]:
import random

random.seed(42)
random.shuffle(scientists['Age'])
print(scientists)

# 데이터프레임의 열 삭제하기

In [84]:
scientists_dropped = scientists.drop(['Died'], axis=1)
print(scientists_dropped)

                   Name        Born  Age          Occupation    born_dt   
0     Rosaline Franklin  1920-07-25   77             Chemist 1920-07-25  \
1        William Gosset  1876-06-13   90        Statistician 1876-06-13   
2  Florence Nightingale  1820-05-12   37               Nurse 1820-05-12   
3           Marie Curie  1867-11-07   61             Chemist 1867-11-07   
4         Rachel Carson  1907-05-27   41           Biologist 1907-05-27   
5             John Snow  1813-03-15   45           Physician 1813-03-15   
6           Alan Turing  1912-06-23   66  Computer Scientist 1912-06-23   
7          Johann Gauss  1777-04-30   56       Mathematician 1777-04-30   

     died_dt age_days_dt  
0 1958-04-16  13779 days  
1 1937-10-16  22404 days  
2 1910-08-13  32964 days  
3 1934-07-04  24345 days  
4 1964-04-14  20777 days  
5 1858-06-16  16529 days  
6 1954-06-07  15324 days  
7 1855-02-23  28422 days  


# 피클 형식으로 저장하기

# CSV 불러오기