In [1]:
import pandas as pd
import numpy as np

In [2]:
dates = pd.date_range('20200625', periods=6)
dates

DatetimeIndex(['2020-06-25', '2020-06-26', '2020-06-27', '2020-06-28',
               '2020-06-29', '2020-06-30'],
              dtype='datetime64[ns]', freq='D')

In [3]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=['A','B','C','D'])
df

Unnamed: 0,A,B,C,D
2020-06-25,0.599921,-0.024623,0.745362,0.797986
2020-06-26,-0.843993,-1.394009,1.216228,-0.899932
2020-06-27,-0.306923,-0.779037,0.891013,0.457907
2020-06-28,0.345191,-0.306771,0.017366,0.883648
2020-06-29,-0.52719,0.339434,-2.388556,0.399834
2020-06-30,-0.417893,0.305258,-1.272104,0.046227


In [4]:
df2 = df.copy()
df2
# df는 기본적으로 행, 열의 위치값을 가지고 있음
# '기본적으로 가지고 있는 위치값(index)'를 이용하여 조작하는 경우 iloc[]를 사용!
# 일부 그래프나 분석 시 특정항목을 index항목으로 수동으로 설정해야하는 경우가 있음
# 특정 항목을 index항목으로 설정가능한 경우는 unique/not null인 값을 가지는 항목만 설정 가능!
# user_id, bbs_id pk조건과 일치!

Unnamed: 0,A,B,C,D
2020-06-25,0.599921,-0.024623,0.745362,0.797986
2020-06-26,-0.843993,-1.394009,1.216228,-0.899932
2020-06-27,-0.306923,-0.779037,0.891013,0.457907
2020-06-28,0.345191,-0.306771,0.017366,0.883648
2020-06-29,-0.52719,0.339434,-2.388556,0.399834
2020-06-30,-0.417893,0.305258,-1.272104,0.046227


In [5]:
df2.reset_index(inplace=True)
df2

Unnamed: 0,index,A,B,C,D
0,2020-06-25,0.599921,-0.024623,0.745362,0.797986
1,2020-06-26,-0.843993,-1.394009,1.216228,-0.899932
2,2020-06-27,-0.306923,-0.779037,0.891013,0.457907
3,2020-06-28,0.345191,-0.306771,0.017366,0.883648
4,2020-06-29,-0.52719,0.339434,-2.388556,0.399834
5,2020-06-30,-0.417893,0.305258,-1.272104,0.046227


In [6]:
df2.set_index('index', inplace=True)
df2

Unnamed: 0_level_0,A,B,C,D
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-06-25,0.599921,-0.024623,0.745362,0.797986
2020-06-26,-0.843993,-1.394009,1.216228,-0.899932
2020-06-27,-0.306923,-0.779037,0.891013,0.457907
2020-06-28,0.345191,-0.306771,0.017366,0.883648
2020-06-29,-0.52719,0.339434,-2.388556,0.399834
2020-06-30,-0.417893,0.305258,-1.272104,0.046227


In [7]:
df2['E'] = df2['A'] + 1 # 브로드캐스팅

In [8]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E'], dtype='object')

In [9]:
df2['E']

index
2020-06-25    1.599921
2020-06-26    0.156007
2020-06-27    0.693077
2020-06-28    1.345191
2020-06-29    0.472810
2020-06-30    0.582107
Name: E, dtype: float64

In [10]:
df2['F'] = 0
df2['F']

index
2020-06-25    0
2020-06-26    0
2020-06-27    0
2020-06-28    0
2020-06-29    0
2020-06-30    0
Name: F, dtype: int64

In [11]:
df2['G'] = range(0, 6)
df2['G']

index
2020-06-25    0
2020-06-26    1
2020-06-27    2
2020-06-28    3
2020-06-29    4
2020-06-30    5
Name: G, dtype: int32

In [12]:
# 인덱스를 변경하고 싶은 경우, 기존의 인덱스를 살리고 싶으면
# 먼저 reset_index()하고 나서 해야함
# reset을 안하는 경우 기존 index는 사라짐
df2.set_index('G', inplace=True)

In [13]:
df2

Unnamed: 0_level_0,A,B,C,D,E,F
G,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.599921,-0.024623,0.745362,0.797986,1.599921,0
1,-0.843993,-1.394009,1.216228,-0.899932,0.156007,0
2,-0.306923,-0.779037,0.891013,0.457907,0.693077,0
3,0.345191,-0.306771,0.017366,0.883648,1.345191,0
4,-0.52719,0.339434,-2.388556,0.399834,0.47281,0
5,-0.417893,0.305258,-1.272104,0.046227,0.582107,0


In [14]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [15]:
df3 = df.copy()
df3

Unnamed: 0,A,B,C,D
2020-06-25,0.599921,-0.024623,0.745362,0.797986
2020-06-26,-0.843993,-1.394009,1.216228,-0.899932
2020-06-27,-0.306923,-0.779037,0.891013,0.457907
2020-06-28,0.345191,-0.306771,0.017366,0.883648
2020-06-29,-0.52719,0.339434,-2.388556,0.399834
2020-06-30,-0.417893,0.305258,-1.272104,0.046227


In [16]:
# E항목을 파생변수로 만드세요. 1~10 범위 값
# F항목을 파생변수로 만드세요. A열과 C열의 합
# H항목을 파생변수로 만드세요. 1로 설정
# Z항목을 파생변수로 만드세요. 1~9까지 랜덤한 값으로 설정

In [17]:
df3['E'] = [1, 3, 5, 7, 9, 4]
df3['E']

2020-06-25    1
2020-06-26    3
2020-06-27    5
2020-06-28    7
2020-06-29    9
2020-06-30    4
Freq: D, Name: E, dtype: int64

In [18]:
df3['F'] = df3['A'] + df3['C']
df3['F']

2020-06-25    1.345283
2020-06-26    0.372235
2020-06-27    0.584090
2020-06-28    0.362557
2020-06-29   -2.915746
2020-06-30   -1.689997
Freq: D, Name: F, dtype: float64

In [19]:
df3['H'] = 1
df3['H']

2020-06-25    1
2020-06-26    1
2020-06-27    1
2020-06-28    1
2020-06-29    1
2020-06-30    1
Freq: D, Name: H, dtype: int64

In [20]:
import random

In [21]:
df3['Z'] = [random.randint(1, 10) for _ in range(6)]
df3['Z']

2020-06-25     2
2020-06-26     4
2020-06-27     1
2020-06-28    10
2020-06-29     2
2020-06-30    10
Freq: D, Name: Z, dtype: int64

In [22]:
df3

Unnamed: 0,A,B,C,D,E,F,H,Z
2020-06-25,0.599921,-0.024623,0.745362,0.797986,1,1.345283,1,2
2020-06-26,-0.843993,-1.394009,1.216228,-0.899932,3,0.372235,1,4
2020-06-27,-0.306923,-0.779037,0.891013,0.457907,5,0.58409,1,1
2020-06-28,0.345191,-0.306771,0.017366,0.883648,7,0.362557,1,10
2020-06-29,-0.52719,0.339434,-2.388556,0.399834,9,-2.915746,1,2
2020-06-30,-0.417893,0.305258,-1.272104,0.046227,4,-1.689997,1,10
