# 식품

In [24]:
import numpy as np
import pandas as pd
import datetime as datetime
import dataload
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
tqdm.pandas()

## 0. 데이터 로드

### 0-1. 구매이력

In [21]:
food_cluster = pd.read_csv('../data/food_tsclustering.csv', encoding='cp949')

In [23]:
food_cluster = food_cluster[['sm_cat', 'cluster']]
food_cluster.head()

Unnamed: 0,sm_cat,cluster
0,가공란,2
1,가자미,2
2,갈비/찜/바비큐용 돈육,3
3,갈비용 우육,3
4,갈치,4


In [25]:
buy_full = dataload.load_buy()

In [80]:
buy_full['date'] = buy_full['date'].apply(lambda x : pd.to_datetime(x, format='%Y%m%d'))
buy_full['year'] = buy_full['date'].apply(lambda x : x.year)

In [81]:
buy_full.head()

Unnamed: 0,date,sex,age,big_cat,sm_cat,qty,year
0,2018-01-01,F,20,식품,가공란,37,2018
1,2018-01-01,F,30,식품,가공란,16,2018
2,2018-01-01,F,40,식품,가공란,9,2018
3,2018-01-01,F,50,식품,가공란,3,2018
4,2018-01-01,M,20,식품,가공란,13,2018


In [82]:
buy1819 = buy_full[(buy_full.year == 2018)|(buy_full.year == 2019)]

In [83]:
food1819 = buy1819[buy1819.big_cat=='식품']

In [84]:
food1819.head()

Unnamed: 0,date,sex,age,big_cat,sm_cat,qty,year
0,2018-01-01,F,20,식품,가공란,37,2018
1,2018-01-01,F,30,식품,가공란,16,2018
2,2018-01-01,F,40,식품,가공란,9,2018
3,2018-01-01,F,50,식품,가공란,3,2018
4,2018-01-01,M,20,식품,가공란,13,2018


In [97]:
food = food1819.merge(food_cluster, on='sm_cat')
food.head()

Unnamed: 0,date,sex,age,big_cat,sm_cat,qty,year,cluster
0,2018-01-01,F,20,식품,가공란,37,2018,2
1,2018-01-01,F,30,식품,가공란,16,2018,2
2,2018-01-01,F,40,식품,가공란,9,2018,2
3,2018-01-01,F,50,식품,가공란,3,2018,2
4,2018-01-01,M,20,식품,가공란,13,2018,2


In [98]:
food0 = food[food.cluster==0]
food1 = food[food.cluster==1]
food2 = food[food.cluster==2]
food3 = food[food.cluster==3]
food4 = food[food.cluster==4]

### 0-2. 날씨

In [100]:
weather = pd.read_csv('../data/weather_final.csv', encoding='cp949')
weather['날짜'] = weather['날짜'].apply(lambda x : pd.to_datetime(x, format='%Y-%m-%d'))

weather.head()

Unnamed: 0,날짜,지점번호,평균기온,최고기온,최저기온,일별강수량,1시간최대강수량,평균풍속,지역,PM10,...,분기,요일,공휴일명,공휴일여부,주말여부,계절,체감온도,열지수,폭염여부,강수여부
0,2018-01-01,105,1.3,5.7,-2.1,0.0,0.0,3.7,강릉,20.066667,...,1,0,1월1일,1,0,3,7.502056,-39.701524,0,0
1,2018-01-01,112,-0.3,2.7,-2.7,0.0,0.0,1.6,인천,37.518681,...,1,0,1월1일,1,0,3,9.992379,-42.924922,0,0
2,2018-01-01,119,-1.7,4.7,-6.9,0.0,0.0,1.0,수원,42.782895,...,1,0,1월1일,1,0,3,10.136402,-45.791893,0,0
3,2018-01-01,136,-1.0,4.7,-6.5,0.0,0.0,2.2,안동,39.75,...,1,0,1월1일,1,0,3,8.356692,-44.375704,0,0
4,2018-01-01,152,2.1,6.2,-0.4,0.0,0.0,3.3,울산,38.548193,...,1,0,1월1일,1,0,3,8.861429,-38.064334,0,0


## 1. 클러스터별 상관관계 분석

In [113]:
def cluster_buy_df(c) : 
    clusterdf = food[food["cluster"] == c]
    df = buy_full[buy_full.sm_cat.isin(list(clusterdf["sm_cat"]))] 
    df = df.groupby('date').sum('qty').reset_index()[["date","qty"]]
    return df

def weather_correlation(buy_df, loc) : 
    df = weather[weather.지역 == loc].reset_index(drop=True)
    df = df.merge(buy_df, left_on="날짜", right_on="date")
    df = df.drop(['연', '월', '일', '분기', '요일', '공휴일명', '공휴일여부', '주말여부', '계절', '폭염여부', '강수여부'], axis=1)
    co = dict(df.corr()["qty"])
    return co

def display_weather_correlation(buy_df) : 
    weathercorrelation = dict()
    for loc in weather.지역.unique() : 
        weathercorrelation[loc] = weather_correlation(buy_df, loc)
    
    tmp = pd.DataFrame(weathercorrelation)
    tmp = tmp.drop(['지점번호', 'qty'], axis=0)
    return tmp

### 1-1. 클러스터 0

In [114]:
display_weather_correlation(cluster_buy_df(0))

Unnamed: 0,강릉,인천,수원,안동,울산,제주,천안,청주,대구,창원,여수,부산,서울,광주,전주,대전
평균기온,-0.388351,-0.395225,-0.408824,-0.420101,-0.393939,-0.383395,-0.421304,-0.416827,-0.412413,-0.40313,-0.401798,-0.39148,-0.410685,-0.422981,-0.412557,-0.417053
최고기온,-0.379229,-0.406117,-0.42457,-0.427946,-0.389587,-0.39424,-0.432923,-0.430956,-0.41382,-0.405343,-0.405266,-0.387924,-0.418373,-0.437263,-0.427015,-0.428202
최저기온,-0.378545,-0.388116,-0.391834,-0.39483,-0.382093,-0.365392,-0.398406,-0.397616,-0.392291,-0.389401,-0.390668,-0.386362,-0.399645,-0.390976,-0.391673,-0.394418
일별강수량,-0.045845,-0.026333,-0.025554,-0.040625,-0.05834,0.022491,-0.073098,-0.046916,-0.06868,-0.053997,-0.046834,-0.075179,-0.062038,-0.083073,-0.08081,-0.069389
1시간최대강수량,-0.043881,-0.020889,-0.001976,-0.020225,-0.059982,0.072873,-0.052896,9.9e-05,-0.072491,-0.034572,-0.052035,-0.079452,-0.057205,-0.078243,-0.078084,-0.038249
평균풍속,0.198941,0.113998,-0.037924,-0.004035,0.057063,0.112368,-0.100622,-0.143314,0.007073,-0.044607,0.121552,-0.062718,0.145587,-0.065296,0.013238,-0.124402
PM10,-0.026427,0.129836,0.11083,0.040543,0.011529,-0.023214,0.138013,0.124365,0.13743,0.057444,0.020621,0.04078,0.118955,0.066432,0.099199,0.13102
PM25,0.01066,0.068158,0.086121,0.091346,-0.045544,-0.047652,0.102078,0.104043,0.132489,0.015682,-0.0009,0.041821,0.065722,0.051679,0.102246,0.113646
1시간최대습도,-0.254527,-0.175518,-0.138735,-0.201554,-0.320799,-0.13589,-0.064654,-0.009997,-0.188777,-0.334345,-0.271139,-0.28836,-0.141368,-0.183906,-0.219498,-0.108638
일조시간합,-0.022914,-0.094885,-0.082575,-0.089499,-0.057124,-0.238801,-0.127502,-0.121617,-0.049698,-0.024369,-0.105629,-0.034491,-0.096398,-0.10721,-0.112572,-0.121373


### 1-2. 클러스터 1

In [115]:
display_weather_correlation(cluster_buy_df(1))

Unnamed: 0,강릉,인천,수원,안동,울산,제주,천안,청주,대구,창원,여수,부산,서울,광주,전주,대전
평균기온,0.407374,0.3765,0.392077,0.402656,0.390135,0.348012,0.394012,0.410553,0.414684,0.382274,0.369754,0.368192,0.398436,0.379542,0.38174,0.404135
최고기온,0.415844,0.385138,0.406286,0.409369,0.403692,0.359786,0.407211,0.421417,0.425469,0.402939,0.384027,0.365884,0.408093,0.388943,0.397506,0.417469
최저기온,0.382704,0.369316,0.363291,0.355407,0.360965,0.336362,0.347989,0.381309,0.374874,0.353872,0.352172,0.360233,0.374157,0.352697,0.354052,0.369819
일별강수량,0.03205,0.034968,0.025381,0.032595,0.012108,0.010522,0.009731,0.012638,0.005753,0.032066,0.024253,0.007896,0.05884,0.004835,0.016987,0.004321
1시간최대강수량,0.0494,0.049074,0.049435,0.062619,0.034653,0.043397,0.048951,0.028174,0.030573,0.03803,0.035806,0.005474,0.072302,0.002732,0.046105,0.027386
평균풍속,-0.123699,-0.079602,0.057999,0.009411,-0.171644,-0.104618,0.017692,0.013535,-0.018364,-0.065721,-0.114398,0.060158,0.042123,0.040318,0.013241,0.072873
PM10,0.085354,-9e-05,0.004182,-0.040572,0.015352,0.004982,-0.035924,-0.078861,-0.07547,-0.055629,-0.021623,-0.032812,0.003618,-0.017112,-0.113072,-0.104256
PM25,-0.022444,-0.020307,-0.027418,-0.236177,0.027879,-0.009383,-0.028626,-0.118173,-0.093143,-0.040762,-0.037546,-0.016406,-0.000395,-0.073037,-0.134267,-0.099241
1시간최대습도,0.150988,0.196154,0.088465,0.075969,0.175837,0.081799,0.142201,-0.046639,0.110323,0.171691,0.212995,0.213773,0.070982,0.111909,0.077458,0.012243
일조시간합,0.096059,0.09796,0.08498,0.109176,0.119922,0.201619,0.109936,0.093407,0.073369,0.112801,0.105714,0.098769,0.117271,0.095426,0.097343,0.105892


### 1-3. 클러스터 2

In [116]:
display_weather_correlation(cluster_buy_df(2))

Unnamed: 0,강릉,인천,수원,안동,울산,제주,천안,청주,대구,창원,여수,부산,서울,광주,전주,대전
평균기온,-0.070837,-0.082733,-0.079544,-0.077809,-0.074112,-0.095055,-0.081713,-0.073869,-0.068272,-0.079191,-0.087953,-0.082644,-0.076856,-0.091836,-0.087269,-0.077271
최고기온,-0.060276,-0.082776,-0.078896,-0.080215,-0.067107,-0.091632,-0.081381,-0.077513,-0.064001,-0.069666,-0.084592,-0.082977,-0.075223,-0.094438,-0.087568,-0.080644
최저기온,-0.07925,-0.081473,-0.083886,-0.086976,-0.082934,-0.08921,-0.087213,-0.077507,-0.077811,-0.084519,-0.087107,-0.080093,-0.081515,-0.08689,-0.085265,-0.081165
일별강수량,0.022093,0.013568,0.010274,0.033796,-0.003644,0.011355,0.000307,0.014923,-0.010488,-0.010541,-0.014835,-0.028547,0.016658,-0.025246,-0.00671,0.014174
1시간최대강수량,0.029893,0.027553,0.015071,0.053591,0.008353,0.03966,0.038797,0.030985,-0.004546,0.004874,-0.000473,-0.010016,0.021106,-0.012872,0.01517,0.045357
평균풍속,0.03297,0.045657,-0.004868,0.015776,-0.015627,-0.006536,-0.040931,-0.070988,0.004881,-0.059813,0.031482,0.02725,0.079118,-0.002738,0.009907,-0.038749
PM10,0.036965,0.117066,0.111675,0.034999,0.024252,0.020856,0.125094,0.067389,0.051299,0.033624,0.015516,0.033342,0.114312,0.084698,0.059731,0.063844
PM25,0.060238,0.099784,0.104878,-0.005135,0.004203,0.032134,0.120688,0.061665,0.053804,0.036008,0.025502,0.043072,0.101061,0.074108,0.068722,0.075519
1시간최대습도,-0.059915,0.030956,0.01249,-0.071085,-0.047511,-0.043364,0.028993,-0.007973,-0.024617,-0.071656,-0.03703,-0.049282,-0.009806,-0.027693,-0.055963,-0.041737
일조시간합,0.014646,-0.026915,-0.032617,-0.020475,0.01134,-0.017703,-0.063535,-0.065191,-0.014808,0.029893,-0.017506,0.015716,-0.023061,-0.041373,-0.05337,-0.058437


### 1-4. 클러스터 3

In [117]:
display_weather_correlation(cluster_buy_df(3))

Unnamed: 0,강릉,인천,수원,안동,울산,제주,천안,청주,대구,창원,여수,부산,서울,광주,전주,대전
평균기온,0.125016,0.109972,0.106905,0.100809,0.117255,0.102375,0.098325,0.112708,0.118883,0.111822,0.109464,0.113669,0.109244,0.090127,0.098432,0.107039
최고기온,0.132028,0.107561,0.101323,0.09205,0.120261,0.098022,0.096866,0.107738,0.115609,0.115119,0.109372,0.112145,0.109594,0.080202,0.097688,0.104669
최저기온,0.120923,0.110343,0.102454,0.095247,0.107342,0.10646,0.089029,0.110293,0.110568,0.105272,0.107793,0.113518,0.102925,0.093458,0.096061,0.102507
일별강수량,0.078323,0.049083,0.031869,0.051592,0.022674,0.05188,0.028268,0.042927,0.007994,0.02967,0.020277,-0.002443,0.033241,-0.003477,0.005422,0.027183
1시간최대강수량,0.091843,0.066096,0.054287,0.088697,0.030276,0.102682,0.068846,0.068952,0.018337,0.032656,0.017395,-0.008676,0.039046,-0.008574,0.02816,0.061783
평균풍속,-0.040334,0.001327,0.014659,-0.044036,-0.109824,-0.045915,-0.061772,-0.114001,-0.041226,-0.12073,0.007111,-0.027526,0.129273,-0.032003,-0.028725,-0.0656
PM10,-0.020766,0.007862,-0.012657,-0.088256,-0.06211,-0.054298,-0.010583,-0.069351,-0.067674,-0.083526,-0.072951,-0.0866,-0.013192,-0.028014,-0.087595,-0.097426
PM25,-0.051181,-0.018536,-0.02152,-0.201592,-0.070372,-0.06903,-0.0041,-0.081844,-0.058926,-0.073591,-0.07484,-0.059198,-0.020163,-0.061501,-0.073266,-0.068846
1시간최대습도,0.06107,0.055759,0.057943,0.025759,0.051548,0.000151,0.160647,0.026693,0.080531,0.007649,0.03316,0.06305,0.038453,0.046029,-0.004405,0.037977
일조시간합,-0.017553,-0.018001,-0.026446,-0.060562,-0.002923,0.00245,-0.055211,-0.06996,-0.038018,0.025112,-0.01663,-0.000947,-0.008297,-0.037683,-0.051944,-0.048979


### 1-5. 클러스터 4

In [118]:
display_weather_correlation(cluster_buy_df(4))

Unnamed: 0,강릉,인천,수원,안동,울산,제주,천안,청주,대구,창원,여수,부산,서울,광주,전주,대전
평균기온,0.020628,0.019481,0.014036,0.005297,0.020965,0.023754,0.002686,0.013503,0.016708,0.018974,0.021401,0.025301,0.012898,0.001397,0.009128,0.010149
최고기온,0.021037,0.010757,-0.004733,-0.00192,0.022829,0.020508,-0.010607,-0.002259,0.012922,0.019063,0.018635,0.024184,0.004915,-0.012021,-0.0024,-0.001169
최저기온,0.02712,0.025056,0.022412,0.013516,0.019473,0.033423,0.010648,0.025075,0.019202,0.020449,0.025029,0.026965,0.016762,0.016542,0.017931,0.021963
일별강수량,0.02662,0.065632,0.085129,0.051564,-0.015147,0.044647,0.003934,0.056464,-0.009683,-0.0048,-0.00136,-0.019933,0.038393,-0.016629,-0.011204,0.018992
1시간최대강수량,0.041464,0.091018,0.126042,0.074887,-0.007942,0.070905,0.05455,0.128592,-0.005787,0.008021,0.007495,-0.013366,0.061788,-0.019819,0.003056,0.059268
평균풍속,-0.063817,0.009773,-0.016307,-0.07175,-0.085017,-0.015218,-0.088177,-0.110022,-0.046331,-0.096604,0.016285,-0.067404,0.08673,-0.019539,-0.04122,-0.08571
PM10,-0.022033,0.002228,-0.005733,-0.053753,-0.017026,-0.044872,0.008654,-0.016016,0.003374,-0.024076,-0.027163,-0.031702,-0.007333,-0.006339,-0.036375,-0.020861
PM25,-0.073677,-0.046192,-0.030262,-0.107158,-0.045036,-0.06457,-0.012642,-0.052323,-0.001708,-0.036161,-0.038928,-0.024527,-0.040896,-0.05115,-0.053346,-0.033382
1시간최대습도,0.003575,-0.015922,-0.023627,0.004132,-0.013876,-0.005897,0.08505,0.024662,0.047666,-0.051562,-0.013309,-0.015848,-0.017952,0.008453,-0.044697,0.01522
일조시간합,-0.05907,-0.050746,-0.055402,-0.061993,-0.000979,-0.04587,-0.085734,-0.100121,-0.049448,0.02006,-0.016785,0.007589,-0.043165,-0.039842,-0.074193,-0.09491
