In [1]:
import numpy as np
import pandas as pd

### Series

In [8]:
s = pd.Series([1, -3, -5, 6, 8])
print(s)
print(type(s))
print()
print(s.index)
print(s.values)
print(s.abs())

0    1
1   -3
2   -5
3    6
4    8
dtype: int64
<class 'pandas.core.series.Series'>

RangeIndex(start=0, stop=5, step=1)
[ 1 -3 -5  6  8]
0    1
1    3
2    5
3    6
4    8
dtype: int64


### DataFrame

In [19]:
a = pd.DataFrame([[1, 2], [3, 4]])
print(a)
print(type(a))
print()
df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
print(df)
print()
df['C'] = 0
print(df)
print()
print(df.index)
print(df.columns)
print(df.values)
print()
df['D'] = 3 * df['B']
print(df)

   0  1
0  1  2
1  3  4
<class 'pandas.core.frame.DataFrame'>

   A  B
0  1  2
1  3  4

   A  B  C
0  1  2  0
1  3  4  0

RangeIndex(start=0, stop=2, step=1)
Index(['A', 'B', 'C'], dtype='object')
[[1 2 0]
 [3 4 0]]

   A  B  C   D
0  1  2  0   6
1  3  4  0  12


### With numpy

In [40]:
data = np.random.randn(8, 4)
print(data)
print()
print(pd.DataFrame(data))
print()
dates = pd.date_range('20220301', periods=8)
print(dates)
df = pd.DataFrame(data, index=dates, columns=list('BCAD'))
print()
print(df)
print(df.index)
print(df.columns)
print(df.values)
print(df.head())
print(df.tail())
print(df.describe())
print(df.T) # ??
print(df.sort_index(ascending=False)) # 행 index
print(df.sort_index(axis=1)) # 열 index
print(df.sort_values(by='B'))

[[-0.25832366  1.11380805 -0.78245087 -0.75494614]
 [-0.630572   -0.23736486 -1.06216117 -1.99723712]
 [ 1.32763581  0.48188804 -0.53408177  0.7663672 ]
 [-0.50314519 -0.07059023 -0.54740849 -0.22400312]
 [ 0.28691825  2.50760575  0.30113976 -1.27890107]
 [ 0.22029821  1.34965851  1.76927196  1.19227656]
 [-0.07368139  1.46701628  0.85278373 -0.36552037]
 [ 0.83665458  0.27329678 -1.18248811  1.52273956]]

          0         1         2         3
0 -0.258324  1.113808 -0.782451 -0.754946
1 -0.630572 -0.237365 -1.062161 -1.997237
2  1.327636  0.481888 -0.534082  0.766367
3 -0.503145 -0.070590 -0.547408 -0.224003
4  0.286918  2.507606  0.301140 -1.278901
5  0.220298  1.349659  1.769272  1.192277
6 -0.073681  1.467016  0.852784 -0.365520
7  0.836655  0.273297 -1.182488  1.522740

DatetimeIndex(['2022-03-01', '2022-03-02', '2022-03-03', '2022-03-04',
               '2022-03-05', '2022-03-06', '2022-03-07', '2022-03-08'],
              dtype='datetime64[ns]', freq='D')

                   

### Access

In [47]:
print(df)
print()
print(dates)

                   B         C         A         D
2022-03-01 -0.258324  1.113808 -0.782451 -0.754946
2022-03-02 -0.630572 -0.237365 -1.062161 -1.997237
2022-03-03  1.327636  0.481888 -0.534082  0.766367
2022-03-04 -0.503145 -0.070590 -0.547408 -0.224003
2022-03-05  0.286918  2.507606  0.301140 -1.278901
2022-03-06  0.220298  1.349659  1.769272  1.192277
2022-03-07 -0.073681  1.467016  0.852784 -0.365520
2022-03-08  0.836655  0.273297 -1.182488  1.522740

DatetimeIndex(['2022-03-01', '2022-03-02', '2022-03-03', '2022-03-04',
               '2022-03-05', '2022-03-06', '2022-03-07', '2022-03-08'],
              dtype='datetime64[ns]', freq='D')


In [53]:
print(df[0:3])
print(df['20220302':'20220305'])
print()
print(df.loc[dates[0]])
print()
print(df.loc[dates[0], ['A','B']])
print(df.loc[:,['A','B']])

                   B         C         A         D
2022-03-01 -0.258324  1.113808 -0.782451 -0.754946
2022-03-02 -0.630572 -0.237365 -1.062161 -1.997237
2022-03-03  1.327636  0.481888 -0.534082  0.766367
                   B         C         A         D
2022-03-02 -0.630572 -0.237365 -1.062161 -1.997237
2022-03-03  1.327636  0.481888 -0.534082  0.766367
2022-03-04 -0.503145 -0.070590 -0.547408 -0.224003
2022-03-05  0.286918  2.507606  0.301140 -1.278901

B   -0.258324
C    1.113808
A   -0.782451
D   -0.754946
Name: 2022-03-01 00:00:00, dtype: float64

A   -0.782451
B   -0.258324
Name: 2022-03-01 00:00:00, dtype: float64
                   A         B
2022-03-01 -0.782451 -0.258324
2022-03-02 -1.062161 -0.630572
2022-03-03 -0.534082  1.327636
2022-03-04 -0.547408 -0.503145
2022-03-05  0.301140  0.286918
2022-03-06  1.769272  0.220298
2022-03-07  0.852784 -0.073681
2022-03-08 -1.182488  0.836655
-0.7824508696045633


In [61]:
print(df.iloc[3])
print()
print(df.iloc[3:5, 0:2])
print(df.iloc[[1, 2, 4], [0, 2]])
print(df.iloc[1:3, :])
print(df[df['A'] > 0])

B   -0.503145
C   -0.070590
A   -0.547408
D   -0.224003
Name: 2022-03-04 00:00:00, dtype: float64

                   B         C
2022-03-04 -0.503145 -0.070590
2022-03-05  0.286918  2.507606
                   B         A
2022-03-02 -0.630572 -1.062161
2022-03-03  1.327636 -0.534082
2022-03-05  0.286918  0.301140
                   B         C         A         D
2022-03-02 -0.630572 -0.237365 -1.062161 -1.997237
2022-03-03  1.327636  0.481888 -0.534082  0.766367
                   B         C         A         D
2022-03-05  0.286918  2.507606  0.301140 -1.278901
2022-03-06  0.220298  1.349659  1.769272  1.192277
2022-03-07 -0.073681  1.467016  0.852784 -0.365520


### 시각화 그래프

In [64]:
import seaborn as sns