In [3]:
import pandas as pd
import numpy as np

In [2]:
dates = pd.date_range('20200625', periods=6)
dates

DatetimeIndex(['2020-06-25', '2020-06-26', '2020-06-27', '2020-06-28',
               '2020-06-29', '2020-06-30'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6, 4), 
                  index=dates, 
                  columns=['A','B','C','D'])

In [5]:
df

Unnamed: 0,A,B,C,D
2020-06-25,-0.178358,-0.000614,-1.549596,0.004689
2020-06-26,-1.654163,1.441047,-1.402851,0.650652
2020-06-27,0.060079,-0.87054,1.543165,0.349406
2020-06-28,0.559574,0.569531,0.046158,-0.534061
2020-06-29,0.554191,1.445515,0.049196,0.567235
2020-06-30,0.309619,-0.988008,-0.754965,0.60138


In [6]:
df2 = df.copy()
df2
# df는 기본적으로 행, 열의 위치값을 가지고 있음.
# "기본적으로 가지고 있는 위치값(index)"를 이용해서 조작하는 경우
# iloc[]를 사용!
# 일부 그래프나 분석시 특정항목을 index항목으로 수동으로 설정해야하는
# 경우가 있음.
# 특정항목을 index항목으로 설정가능한 경우는 
# "unique/not null인 값을 가지는 항목"만
# 설정 가능! user_id, bbs_id pk조건과 일치!


Unnamed: 0,A,B,C,D
2020-06-25,-0.178358,-0.000614,-1.549596,0.004689
2020-06-26,-1.654163,1.441047,-1.402851,0.650652
2020-06-27,0.060079,-0.87054,1.543165,0.349406
2020-06-28,0.559574,0.569531,0.046158,-0.534061
2020-06-29,0.554191,1.445515,0.049196,0.567235
2020-06-30,0.309619,-0.988008,-0.754965,0.60138


In [9]:
df2.reset_index(inplace=True)

In [10]:
df2

Unnamed: 0,index,A,B,C,D
0,2020-06-25,-0.178358,-0.000614,-1.549596,0.004689
1,2020-06-26,-1.654163,1.441047,-1.402851,0.650652
2,2020-06-27,0.060079,-0.87054,1.543165,0.349406
3,2020-06-28,0.559574,0.569531,0.046158,-0.534061
4,2020-06-29,0.554191,1.445515,0.049196,0.567235
5,2020-06-30,0.309619,-0.988008,-0.754965,0.60138


In [11]:
df2.set_index('index', inplace=True)

In [12]:
df2

Unnamed: 0_level_0,A,B,C,D
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-06-25,-0.178358,-0.000614,-1.549596,0.004689
2020-06-26,-1.654163,1.441047,-1.402851,0.650652
2020-06-27,0.060079,-0.87054,1.543165,0.349406
2020-06-28,0.559574,0.569531,0.046158,-0.534061
2020-06-29,0.554191,1.445515,0.049196,0.567235
2020-06-30,0.309619,-0.988008,-0.754965,0.60138


In [13]:
df2['E'] = df2['A'] + 1 #브로드캐스팅!

In [14]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E'], dtype='object')

In [15]:
df2['E']

index
2020-06-25    0.821642
2020-06-26   -0.654163
2020-06-27    1.060079
2020-06-28    1.559574
2020-06-29    1.554191
2020-06-30    1.309619
Name: E, dtype: float64

In [16]:
df2['F'] = 0

In [17]:
df2['F']

index
2020-06-25    0
2020-06-26    0
2020-06-27    0
2020-06-28    0
2020-06-29    0
2020-06-30    0
Name: F, dtype: int64

In [18]:
df2['G'] = range(0, 6)
df2['G']

index
2020-06-25    0
2020-06-26    1
2020-06-27    2
2020-06-28    3
2020-06-29    4
2020-06-30    5
Name: G, dtype: int64

In [19]:
# 인덱스를 변경하고 싶은 경우, 기존의 인덱스를 살리고 싶으면
# 먼저 reset_index()하고 나서 해야함.
# reset을 하지 않는 경우 기존 index항목은 사라짐.
df2.set_index('G', inplace=True)

In [20]:
df2

Unnamed: 0_level_0,A,B,C,D,E,F
G,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,-0.178358,-0.000614,-1.549596,0.004689,0.821642,0
1,-1.654163,1.441047,-1.402851,0.650652,-0.654163,0
2,0.060079,-0.87054,1.543165,0.349406,1.060079,0
3,0.559574,0.569531,0.046158,-0.534061,1.559574,0
4,0.554191,1.445515,0.049196,0.567235,1.554191,0
5,0.309619,-0.988008,-0.754965,0.60138,1.309619,0


In [21]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [23]:
df3 = df.copy()
df3

Unnamed: 0,A,B,C,D
2020-06-25,-0.178358,-0.000614,-1.549596,0.004689
2020-06-26,-1.654163,1.441047,-1.402851,0.650652
2020-06-27,0.060079,-0.87054,1.543165,0.349406
2020-06-28,0.559574,0.569531,0.046158,-0.534061
2020-06-29,0.554191,1.445515,0.049196,0.567235
2020-06-30,0.309619,-0.988008,-0.754965,0.60138


In [24]:
# E항목을 파생변수로 만드세요. 1~10범위 값
# F항목을 파생변수로 만드세요. A열과 C열의 합
# H항목을 파생변수로 만드세요. 1로 설정
# Z항목을 파생변수로 만드세요. 1~9까지의 랜덤한 값으로 설정

In [26]:
import random

In [27]:
df3['Z'] = random.randint(1, 10)
df3['Z']

2020-06-25    2
2020-06-26    2
2020-06-27    2
2020-06-28    2
2020-06-29    2
2020-06-30    2
Freq: D, Name: Z, dtype: int64

In [28]:
df3['Z2'] = [random.randint(1, 10) for _ in range(6)]
df3['Z2']

2020-06-25    4
2020-06-26    4
2020-06-27    2
2020-06-28    4
2020-06-29    1
2020-06-30    6
Freq: D, Name: Z2, dtype: int64