### 라이브러리 선언

In [1]:
import pandas as pd

In [2]:
import numpy as np

### 데이터 불러오기

In [3]:
selloutData = pd.read_csv("../dataset/kopo_channel_seasonality_new.csv")

In [4]:
selloutData.dtypes

REGIONID     object
PRODUCT      object
YEARWEEK      int64
QTY         float64
dtype: object

### 1. 데이터 통합

In [5]:
selloutData["REGIONID"] = selloutData["REGIONID"].astype(str)
selloutData["PRODUCT"] = selloutData["PRODUCT"].astype(str)
selloutData["YEARWEEK"] = selloutData["YEARWEEK"].astype(str)
selloutData["QTY"] = selloutData["QTY"].astype(float)

In [6]:
selloutData.dtypes

REGIONID     object
PRODUCT      object
YEARWEEK     object
QTY         float64
dtype: object

### 2. 불량데이터 처리

In [7]:
selloutData["QTY_NEW"] = np.where(selloutData["QTY"] < 0, 0, selloutData["QTY"])

In [8]:
len(selloutData)

124658

### 3. 불량데이터 처리

In [9]:
selloutData["YEAR"] = selloutData.YEARWEEK.astype(str).str[0:4]
selloutData.head(1)

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY,QTY_NEW,YEAR
0,A60,PRODUCT4,201402,71.0,71.0,2014


In [10]:
selloutData["WEEK"] = selloutData.YEARWEEK.astype(str).str[4:]
selloutData.head(1)

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY,QTY_NEW,YEAR,WEEK
0,A60,PRODUCT4,201402,71.0,71.0,2014,2


In [11]:
refinedSelloutData = selloutData[selloutData.WEEK.astype(int) <= 52]

In [12]:
len(refinedSelloutData)

123864

### 4. 데이터 정렬 및 집계

In [38]:
sortKey = ["REGIONID","PRODUCT","YEARWEEK"]

In [39]:
sortedDta = refinedSelloutData.sort_values(sortKey)
sortedDta.head()

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY,QTY_NEW,YEAR,WEEK
298,A00,PRODUCT34,201401,661.0,661.0,2014,1
1757,A00,PRODUCT34,201402,679.0,679.0,2014,2
3125,A00,PRODUCT34,201403,578.0,578.0,2014,3
205,A00,PRODUCT34,201404,532.0,532.0,2014,4
4369,A00,PRODUCT34,201405,516.0,516.0,2014,5


In [40]:
### 기존 인덱스를 drop 하고 새로 인덱스를 생성
sortedDta = sortedDta.reset_index(drop=True)

In [43]:
sortedDta.head(2)

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY,QTY_NEW,YEAR,WEEK
0,A00,PRODUCT34,201401,661.0,661.0,2014,1
1,A00,PRODUCT34,201402,679.0,679.0,2014,2


In [44]:
basicKey = ["REGIONID","PRODUCT","YEARWEEK"]

In [45]:
sortedData = selloutData.sort_values(basicKey)

In [47]:
sortedData.head(3)

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY,QTY_NEW,YEAR,WEEK
298,A00,PRODUCT34,201401,661.0,661.0,2014,1
1757,A00,PRODUCT34,201402,679.0,679.0,2014,2
3125,A00,PRODUCT34,201403,578.0,578.0,2014,3


In [48]:
groupKey = ["REGIONID","PRODUCT","YEAR"]
groupKey

['REGIONID', 'PRODUCT', 'YEAR']

In [49]:
calcKey = ['mean','std']

In [54]:
groupData = sortedData.groupby(groupKey).agg(calcKey)["QTY_NEW"]
groupData.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,std
REGIONID,PRODUCT,YEAR,Unnamed: 3_level_1,Unnamed: 4_level_1
A00,PRODUCT34,2014,275.961538,162.179634
A00,PRODUCT34,2015,85.698113,72.858024


In [55]:
groupData = groupData.reset_index()
groupData.head(2)

Unnamed: 0,REGIONID,PRODUCT,YEAR,mean,std
0,A00,PRODUCT34,2014,275.961538,162.179634
1,A00,PRODUCT34,2015,85.698113,72.858024


In [56]:
groupData.columns = ["REGIONID","PRODUCT","YEAR","QTY_MEAN","QTY_STD"]

In [57]:
groupData.head()

Unnamed: 0,REGIONID,PRODUCT,YEAR,QTY_MEAN,QTY_STD
0,A00,PRODUCT34,2014,275.961538,162.179634
1,A00,PRODUCT34,2015,85.698113,72.858024
2,A00,PRODUCT34,2016,36.576923,94.897539
3,A00,PRODUCT58,2014,2.673077,3.889257
4,A00,PRODUCT58,2015,7.54717,14.909427


### 5. 소스 데이터 집계데이터 합치기

In [64]:
refinedSelloutData.head(4)

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY,QTY_NEW,YEAR,WEEK
0,A60,PRODUCT4,201402,71.0,71.0,2014,2
1,A60,PRODUCT59,201402,22275.0,22275.0,2014,2
2,A60,PRODUCT34,201402,4463.0,4463.0,2014,2
3,A60,PRODUCT47,201402,0.0,0.0,2014,2


In [61]:
joinKey = ["REGIONID","PRODUCT","YEAR"]
joinKey

['REGIONID', 'PRODUCT', 'YEAR']

In [62]:
mergedData = pd.merge(refinedSelloutData,groupData,\
         left_on=joinKey, right_on=joinKey) \
[["REGIONID","PRODUCT","YEARWEEK","YEAR","WEEK","QTY_NEW","QTY_MEAN","QTY_STD"]]

In [63]:
mergedData.head()

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,YEAR,WEEK,QTY_NEW,QTY_MEAN,QTY_STD
0,A60,PRODUCT4,201402,2014,2,71.0,29.134615,13.225093
1,A60,PRODUCT4,201401,2014,1,16.0,29.134615,13.225093
2,A60,PRODUCT4,201403,2014,3,51.0,29.134615,13.225093
3,A60,PRODUCT4,201406,2014,6,19.0,29.134615,13.225093
4,A60,PRODUCT4,201404,2014,4,21.0,29.134615,13.225093


In [65]:
mergedData["SEASONALITY"]= mergedData["QTY_NEW"] / mergedData["QTY_MEAN"]

In [66]:
mergedData.head()

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,YEAR,WEEK,QTY_NEW,QTY_MEAN,QTY_STD,SEASONALITY
0,A60,PRODUCT4,201402,2014,2,71.0,29.134615,13.225093,2.436964
1,A60,PRODUCT4,201401,2014,1,16.0,29.134615,13.225093,0.549175
2,A60,PRODUCT4,201403,2014,3,51.0,29.134615,13.225093,1.750495
3,A60,PRODUCT4,201406,2014,6,19.0,29.134615,13.225093,0.652145
4,A60,PRODUCT4,201404,2014,4,21.0,29.134615,13.225093,0.720792


In [81]:
mergedData[["REGIONID","PRODUCT"]].drop_duplicates().head()

Unnamed: 0,REGIONID,PRODUCT
0,A00,PRODUCT34
157,A00,PRODUCT58
314,A00,PRODUCT59
471,A01,PRODUCT1
628,A01,PRODUCT12


In [67]:
mergedData[ (mergedData.REGIONID == 'A01') &
            (mergedData.PRODUCT == 'PRODUCT14') &
            (mergedData.WEEK == '50')]

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,YEAR,WEEK,QTY_NEW,QTY_MEAN,QTY_STD,SEASONALITY
3057,A01,PRODUCT14,201450,2014,50,10559.0,12495.769231,6870.540503,0.845006
60763,A01,PRODUCT14,201550,2015,50,19492.0,18725.924528,8938.3438,1.04091
95236,A01,PRODUCT14,201650,2016,50,26844.0,22567.307692,9437.701749,1.189508


In [68]:
groupKey = ['REGIONID','PRODUCT','WEEK']

In [73]:
finalResult = mergedData.groupby(groupKey).mean()[["SEASONALITY","QTY_NEW","QTY_MEAN"]]

In [74]:
finalResult = finalResult.reset_index()

In [75]:
finalResult.head()

Unnamed: 0,REGIONID,PRODUCT,WEEK,SEASONALITY,QTY_NEW,QTY_MEAN
0,A00,PRODUCT34,1,1.577131,277.666667,132.745525
1,A00,PRODUCT34,2,1.763571,297.333333,132.745525
2,A00,PRODUCT34,3,1.323958,236.0,132.745525
3,A00,PRODUCT34,4,1.496774,238.0,132.745525
4,A00,PRODUCT34,5,1.06481,201.333333,132.745525


In [76]:
finalResult[ (finalResult.REGIONID == 'A01') &
            (finalResult.PRODUCT == 'PRODUCT14') ]

Unnamed: 0,REGIONID,PRODUCT,WEEK,SEASONALITY,QTY_NEW,QTY_MEAN
260,A01,PRODUCT14,1,0.836757,15133.0,17929.66715
261,A01,PRODUCT14,2,0.576154,10441.0,17929.66715
262,A01,PRODUCT14,3,0.60578,10805.0,17929.66715
263,A01,PRODUCT14,4,0.68049,12412.666667,17929.66715
264,A01,PRODUCT14,5,0.625755,11648.333333,17929.66715
265,A01,PRODUCT14,6,0.720447,13072.666667,17929.66715
266,A01,PRODUCT14,7,0.891755,16297.333333,17929.66715
267,A01,PRODUCT14,8,0.883225,15795.0,17929.66715
268,A01,PRODUCT14,9,0.676019,11972.666667,17929.66715
269,A01,PRODUCT14,10,0.628091,10986.0,17929.66715
