In [43]:
import pandas as pd

### 1. 데이터 불러오기

In [44]:
selloutData = pd.read_csv("../dataset/kopo_channel_seasonality_new.csv")
selloutData.head()

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY
0,A60,PRODUCT4,201402,71.0
1,A60,PRODUCT59,201402,22275.0
2,A60,PRODUCT34,201402,4463.0
3,A60,PRODUCT47,201402,0.0
4,A60,PRODUCT56,201402,23.0


### 2. 데이터 타입 통합

In [45]:
selloutData["REGIONID"] = selloutData.REGIONID.astype("str")
selloutData["PRODUCT"] = selloutData.PRODUCT.astype("str")
selloutData["YEARWEEK"] = selloutData.YEARWEEK.astype("str")
selloutData["QTY"] = selloutData.QTY.astype("float")

In [46]:
selloutData.dtypes

REGIONID     object
PRODUCT      object
YEARWEEK     object
QTY         float64
dtype: object

In [47]:
import numpy as np

In [48]:
selloutData["QTY_NEW"] = np.where(
    selloutData["QTY"] < 0, 0, \
    np.where(selloutData["QTY"] < 10000, 10000, selloutData["QTY"]))

In [49]:
selloutData.head(2)

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY,QTY_NEW
0,A60,PRODUCT4,201402,71.0,10000.0
1,A60,PRODUCT59,201402,22275.0,22275.0


In [50]:
selloutData = selloutData[["REGIONID","PRODUCT","YEARWEEK","QTY","QTY_NEW"]]

### 3. 반품데이터 처리

In [51]:
selloutData["QTY_NEW"] = np.where(selloutData["QTY"] < 0 , 0, selloutData["QTY"]    )

In [52]:
selloutData[ selloutData["QTY"] < 0 ].head(2)

Unnamed: 0,REGIONID,PRODUCT,YEARWEEK,QTY,QTY_NEW
65,A02,PRODUCT16,201403,-1.0,0.0
338,A08,PRODUCT36,201402,-1.0,0.0


### 4. 53주차 제거 및 YEAR, WEEK 컬럼 생성

In [53]:
selloutData["YEAR"]  = selloutData["YEARWEEK"].str[0:4]
selloutData["WEEK"]  = selloutData["YEARWEEK"].str[4:]

In [54]:
refinedSelloutData = selloutData[selloutData.WEEK.astype(int) <= 52]

In [55]:
refinedSelloutData["WEEK"].max()

'52'

### 5. 집계데이터 활용 평균 판매량 생성

In [56]:
groupKey = ["REGIONID","PRODUCT","YEAR"]

In [57]:
groupData = refinedSelloutData.groupby(groupKey).mean()[["QTY_NEW"]]

In [58]:
groupResult = groupData.reset_index()

In [59]:
groupResult.head()

Unnamed: 0,REGIONID,PRODUCT,YEAR,QTY_NEW
0,A00,PRODUCT34,2014,275.961538
1,A00,PRODUCT34,2015,86.634615
2,A00,PRODUCT34,2016,36.576923
3,A00,PRODUCT58,2014,2.673077
4,A00,PRODUCT58,2015,5.711538


In [60]:
groupResult.columns = ["REGIONID","PRODUCT","YEAR","QTY_MEAN"]

In [61]:
groupKey = ["REGIONID","PRODUCT","YEAR"]

### 6. 정제데이터와 집계데이터 조인 (지역/상품/연도)

In [62]:
mergedData = pd.merge(refinedSelloutData, groupResult, on=groupKey, how="left")

### 7. 주차별 판매효과 산출

In [63]:
mergedData["SEASONALITY"] =  mergedData.QTY_NEW / mergedData.QTY_MEAN

In [64]:
groupKey = ["REGIONID","PRODUCT",'WEEK']

In [65]:
finalData =  mergedData.groupby(groupKey)["QTY","SEASONALITY"].mean()

In [66]:
finalResult = finalData.reset_index()

In [67]:
finalResult.head()

Unnamed: 0,REGIONID,PRODUCT,WEEK,QTY,SEASONALITY
0,A00,PRODUCT34,1,277.666667,1.570782
1,A00,PRODUCT34,2,297.333333,1.75554
2,A00,PRODUCT34,3,236.0,1.31946
3,A00,PRODUCT34,4,238.0,1.490298
4,A00,PRODUCT34,5,201.333333,1.061909
