In [1]:
import math
import numpy as np
import pandas as pd

### CSV 데이터 불러오기

In [2]:
selloutData = pd.read_csv("../dataset/kopo_channel_seasonality_new.csv")
selloutData.head()

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY
0,A60,PRODUCT4,201402,71.0
1,A60,PRODUCT59,201402,22275.0
2,A60,PRODUCT34,201402,4463.0
3,A60,PRODUCT47,201402,0.0
4,A60,PRODUCT56,201402,23.0


### QTY=음수 구하기

In [3]:
answer1 = selloutData[(selloutData.QTY<0)]
answer1.head(2)

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY
65,A02,PRODUCT16,201403,-1.0
338,A08,PRODUCT36,201402,-1.0


In [4]:
len(answer1)

323

### QTY=음수를 0으로 변환 후 QTY_NEW로

In [5]:
selloutData["QTY_NEW"] = np.where( selloutData["QTY"] < 0, 0, 
                                  selloutData["QTY"])
selloutData.head(1)

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY,QTY_NEW
0,A60,PRODUCT4,201402,71.0,71.0


In [6]:
selloutData.head(1)
selloutData.dtypes

REGIONID     object
PRODUCT      object
YEARWEEK      int64
QTY         float64
QTY_NEW     float64
dtype: object

### YEAR와 WEEK 분리

In [7]:
selloutData["YEAR"] = selloutData["YEARWEEK"].astype(str).str[0:4]
selloutData["WEEK"] = selloutData["YEARWEEK"].astype(str).str[4:]

In [8]:
selloutData.head(2)

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY,QTY_NEW,YEAR,WEEK
0,A60,PRODUCT4,201402,71.0,71.0,2014,2
1,A60,PRODUCT59,201402,22275.0,22275.0,2014,2


In [9]:
selloutData.dtypes

REGIONID     object
PRODUCT      object
YEARWEEK      int64
QTY         float64
QTY_NEW     float64
YEAR         object
WEEK         object
dtype: object

### 53주차 날리기

In [10]:
cleanData = selloutData[selloutData.WEEK != "53"]
cleanData.head(10)

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY,QTY_NEW,YEAR,WEEK
0,A60,PRODUCT4,201402,71.0,71.0,2014,2
1,A60,PRODUCT59,201402,22275.0,22275.0,2014,2
2,A60,PRODUCT34,201402,4463.0,4463.0,2014,2
3,A60,PRODUCT47,201402,0.0,0.0,2014,2
4,A60,PRODUCT56,201402,23.0,23.0,2014,2
5,A60,PRODUCT57,201402,14.0,14.0,2014,2
6,A60,PRODUCT12,201402,1186.0,1186.0,2014,2
7,A60,PRODUCT1,201402,7.0,7.0,2014,2
8,A60,PRODUCT33,201402,101.0,101.0,2014,2
9,A60,PRODUCT62,201402,336.0,336.0,2014,2


In [11]:
len(cleanData)

123864

In [12]:
len(selloutData)

124658

In [13]:
len(selloutData)-len(cleanData)

794

### 컬럼명 소문자로 변경

In [14]:
cleanData.columns = cleanData.columns.str.lower()
sortedData = cleanData.sort_values(['regionid','product','yearweek'])
sortedData.reset_index(inplace=True, drop=True)

In [15]:
sortedData.head()

Unnamed: 0,regionid,product,yearweek,qty,qty_new,year,week
0,A00,PRODUCT34,201401,661.0,661.0,2014,1
1,A00,PRODUCT34,201402,679.0,679.0,2014,2
2,A00,PRODUCT34,201403,578.0,578.0,2014,3
3,A00,PRODUCT34,201404,532.0,532.0,2014,4
4,A00,PRODUCT34,201405,516.0,516.0,2014,5


### 이동평균 구하기

In [16]:
def sub_function(data):
    data["ma"] = data["qty_new"].rolling(window = 17, center = True, min_periods=1).mean()
    return data

In [17]:
maresult = sortedData.groupby(["regionid","product"]).apply(sub_function)
maresult.head()

Unnamed: 0,regionid,product,yearweek,qty,qty_new,year,week,ma
0,A00,PRODUCT34,201401,661.0,661.0,2014,1,514.444444
1,A00,PRODUCT34,201402,679.0,679.0,2014,2,516.8
2,A00,PRODUCT34,201403,578.0,578.0,2014,3,503.363636
3,A00,PRODUCT34,201404,532.0,532.0,2014,4,490.75
4,A00,PRODUCT34,201405,516.0,516.0,2014,5,480.230769


### 변동률 구하기

In [18]:
def sub_function2(data):
    data["std"] = data["ma"].rolling(window = 9, center = True, min_periods=1).std()
    return data

In [19]:
stdresult = maresult.groupby(["regionid","product"]).apply(sub_function2)
stdresult.head()

Unnamed: 0,regionid,product,yearweek,qty,qty_new,year,week,ma,std
0,A00,PRODUCT34,201401,661.0,661.0,2014,1,514.444444,15.591135
1,A00,PRODUCT34,201402,679.0,679.0,2014,2,516.8,19.362507
2,A00,PRODUCT34,201403,578.0,578.0,2014,3,503.363636,23.007444
3,A00,PRODUCT34,201404,532.0,532.0,2014,4,490.75,26.416662
4,A00,PRODUCT34,201405,516.0,516.0,2014,5,480.230769,29.984975


### UPPER BOUND / LOWER BOUND 구하기

In [20]:
stdresult["upper_bound"] = stdresult["ma"] + stdresult["std"]
stdresult["lower_bound"] = stdresult["ma"] - stdresult["std"]

In [21]:
stdresult.head()

Unnamed: 0,regionid,product,yearweek,qty,qty_new,year,week,ma,std,upper_bound,lower_bound
0,A00,PRODUCT34,201401,661.0,661.0,2014,1,514.444444,15.591135,530.035579,498.853309
1,A00,PRODUCT34,201402,679.0,679.0,2014,2,516.8,19.362507,536.162507,497.437493
2,A00,PRODUCT34,201403,578.0,578.0,2014,3,503.363636,23.007444,526.371081,480.356192
3,A00,PRODUCT34,201404,532.0,532.0,2014,4,490.75,26.416662,517.166662,464.333338
4,A00,PRODUCT34,201405,516.0,516.0,2014,5,480.230769,29.984975,510.215744,450.245795


### 정제된 판매량 구하기

In [22]:
stdresult["refined_qty"] = np.where(stdresult["qty_new"] > stdresult["upper_bound"], stdresult["upper_bound"],\
                           np.where(stdresult["qty_new"] < stdresult["lower_bound"], stdresult["lower_bound"],stdresult["qty_new"]))
stdresult.head()

Unnamed: 0,regionid,product,yearweek,qty,qty_new,year,week,ma,std,upper_bound,lower_bound,refined_qty
0,A00,PRODUCT34,201401,661.0,661.0,2014,1,514.444444,15.591135,530.035579,498.853309,530.035579
1,A00,PRODUCT34,201402,679.0,679.0,2014,2,516.8,19.362507,536.162507,497.437493,536.162507
2,A00,PRODUCT34,201403,578.0,578.0,2014,3,503.363636,23.007444,526.371081,480.356192,526.371081
3,A00,PRODUCT34,201404,532.0,532.0,2014,4,490.75,26.416662,517.166662,464.333338,517.166662
4,A00,PRODUCT34,201405,516.0,516.0,2014,5,480.230769,29.984975,510.215744,450.245795,510.215744


### 스무딩 처리

In [23]:
def sub_function3(data):
    data.reset_index(drop=True, inplace=True)    
    data["smooth"] = data["refined_qty"].rolling(window = 5, center = True, min_periods=1).mean()
    return data

smoothData = stdresult.groupby(["regionid","product"]).apply(sub_function3)  

smoothData.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,regionid,product,yearweek,qty,qty_new,year,week,ma,std,upper_bound,lower_bound,refined_qty,smooth
regionid,product,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
A00,PRODUCT34,0,A00,PRODUCT34,201401,661.0,661.0,2014,1,514.444444,15.591135,530.035579,498.853309,530.035579,530.856389
A00,PRODUCT34,1,A00,PRODUCT34,201402,679.0,679.0,2014,2,516.8,19.362507,536.162507,497.437493,536.162507,527.433957
A00,PRODUCT34,2,A00,PRODUCT34,201403,578.0,578.0,2014,3,503.363636,23.007444,526.371081,480.356192,526.371081,523.990314
A00,PRODUCT34,3,A00,PRODUCT34,201404,532.0,532.0,2014,4,490.75,26.416662,517.166662,464.333338,517.166662,504.548131
A00,PRODUCT34,4,A00,PRODUCT34,201405,516.0,516.0,2014,5,480.230769,29.984975,510.215744,450.245795,510.215744,480.364578


### 계절성 지수 1/2 구하기

In [24]:
smoothData["seasonality1"] = smoothData["qty_new"] / smoothData["smooth"]
smoothData["seasonality2"] = smoothData["refined_qty"] / smoothData["smooth"]
smoothData.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,regionid,product,yearweek,qty,qty_new,year,week,ma,std,upper_bound,lower_bound,refined_qty,smooth,seasonality1,seasonality2
regionid,product,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
A00,PRODUCT34,0,A00,PRODUCT34,201401,661.0,661.0,2014,1,514.444444,15.591135,530.035579,498.853309,530.035579,530.856389,1.245158,0.998454
A00,PRODUCT34,1,A00,PRODUCT34,201402,679.0,679.0,2014,2,516.8,19.362507,536.162507,497.437493,536.162507,527.433957,1.287365,1.016549
A00,PRODUCT34,2,A00,PRODUCT34,201403,578.0,578.0,2014,3,503.363636,23.007444,526.371081,480.356192,526.371081,523.990314,1.103074,1.004544
A00,PRODUCT34,3,A00,PRODUCT34,201404,532.0,532.0,2014,4,490.75,26.416662,517.166662,464.333338,517.166662,504.548131,1.054409,1.02501
A00,PRODUCT34,4,A00,PRODUCT34,201405,516.0,516.0,2014,5,480.230769,29.984975,510.215744,450.245795,510.215744,480.364578,1.074184,1.062143


In [25]:
pwd

'D:\\Python_CJ_Project\\Python_CJ_ST\\Session07 - Data Visualization'

### CSV로 저장하기

In [26]:
smoothData.to_csv("../smoothData_result.csv")