In [2]:
import numpy as np
import pandas as pd

In [6]:
s1 = pd.Series([1.0, 3.0, np.nan, np.inf, -1.0])
s1

0    1.0
1    3.0
2    NaN
3    inf
4   -1.0
dtype: float64

In [8]:
s1 = pd.Series([1.0, 3.0, np.nan, np.inf, -1.0], index=['a', 'b', 'c', 'd', 'e'])
s1

a    1.0
b    3.0
c    NaN
d    inf
e   -1.0
dtype: float64

In [9]:
s1.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [10]:
s1.values

array([ 1.,  3., nan, inf, -1.])

In [11]:
dict_data = {
    '국어': 100,
    '영어': 85,
    '수학': 95
}
s2 = pd.Series(dict_data)
s2

국어    100
영어     85
수학     95
dtype: int64

## Series 연산

In [15]:
s1 = pd.Series(np.arange(10.0))
s1

0    0.0
1    1.0
2    2.0
3    3.0
4    4.0
5    5.0
6    6.0
7    7.0
8    8.0
9    9.0
dtype: float64

In [16]:
s2 = pd.Series(np.arange(0.1, 1, 0.1))
s2

0    0.1
1    0.2
2    0.3
3    0.4
4    0.5
5    0.6
6    0.7
7    0.8
8    0.9
dtype: float64

In [18]:
print(s1 + s2)
print(s1 * s2)

0    0.1
1    1.2
2    2.3
3    3.4
4    4.5
5    5.6
6    6.7
7    7.8
8    8.9
9    NaN
dtype: float64
0    0.0
1    0.2
2    0.6
3    1.2
4    2.0
5    3.0
6    4.2
7    5.6
8    7.2
9    NaN
dtype: float64


### date_range

In [19]:
pd.date_range('2018-03-05', '2020-03-05')

DatetimeIndex(['2018-03-05', '2018-03-06', '2018-03-07', '2018-03-08',
               '2018-03-09', '2018-03-10', '2018-03-11', '2018-03-12',
               '2018-03-13', '2018-03-14',
               ...
               '2020-02-25', '2020-02-26', '2020-02-27', '2020-02-28',
               '2020-02-29', '2020-03-01', '2020-03-02', '2020-03-03',
               '2020-03-04', '2020-03-05'],
              dtype='datetime64[ns]', length=732, freq='D')

In [21]:
pd.date_range('2018-03-05', '2020-03-05', freq='M')

DatetimeIndex(['2018-03-31', '2018-04-30', '2018-05-31', '2018-06-30',
               '2018-07-31', '2018-08-31', '2018-09-30', '2018-10-31',
               '2018-11-30', '2018-12-31', '2019-01-31', '2019-02-28',
               '2019-03-31', '2019-04-30', '2019-05-31', '2019-06-30',
               '2019-07-31', '2019-08-31', '2019-09-30', '2019-10-31',
               '2019-11-30', '2019-12-31', '2020-01-31', '2020-02-29'],
              dtype='datetime64[ns]', freq='M')

## DataFrame

In [22]:
list1 = np.arange(10)
list2 = np.arange(0, 1.0, 0.1)

In [26]:
list1 = np.arange(15).reshape(5, 3)
list1

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [28]:
df1 = pd.DataFrame(list1)
df1

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11
4,12,13,14


In [32]:
df1 = pd.DataFrame(list1, columns=['a', 'b', 'c'], index=[1,2,3,4,5])
df1

Unnamed: 0,a,b,c
1,0,1,2
2,3,4,5
3,6,7,8
4,9,10,11
5,12,13,14


In [None]:
[
    {
        'key': 'value',
        'key2': 'value2',
    },
    {
        'key': 'value',
        'key2': 'value2',
    },
    {
        'key': 'value',
        'key2': 'value2',
    },
]

In [33]:
english_score = np.random.randint(70, 100, 5)
kor_score = np.random.randint(50, 100, 5)
math_score = np.random.randint(0, 100, 5)

score_df = pd.DataFrame({
    'english': english_score,
    'kor': kor_score,
    'math_score': math_score
})

In [35]:
score_df

Unnamed: 0,english,kor,math_score
0,85,63,87
1,90,51,75
2,78,91,32
3,92,77,93
4,87,69,70


In [36]:
score_df.index

RangeIndex(start=0, stop=5, step=1)

In [37]:
score_df.columns

Index(['english', 'kor', 'math_score'], dtype='object')

In [39]:
score_df.columns = ['eng', 'kor', 'math']
score_df

Unnamed: 0,eng,kor,math
0,85,63,87
1,90,51,75
2,78,91,32
3,92,77,93
4,87,69,70


## indexing, slicing

In [41]:
KTX_data = {'경부선 KTX': [39060, 39896, 42005, 43621, 41702, 41266, 32427],
            '호남선 KTX': [7313, 6967, 6873, 6626, 8675, 10622, 9228],
            '경전선 KTX': [3627, 4168, 4088, 4424, 4606, 4984, 5570],
            '전라선 KTX': [309, 1771, 1954, 2244, 3146, 3945, 5766],
            '동해선 KTX': [np.nan,np.nan, np.nan, np.nan, 2395, 3786, 6667]}

col_list = ['경부선 KTX','호남선 KTX','경전선 KTX','전라선 KTX','동해선 KTX']
index_list = ['2011', '2012', '2013', '2014', '2015', '2016', '2017']