In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import warnings
warnings.filterwarnings('ignore')
from matplotlib import font_manager,rc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from fbprophet import Prophet 

In [3]:
def test_data_split(data):
  train = data[data['base_date']<'2021-06-01']
  test = data[data['base_date']>='2021-06-01']
  return train, test

In [4]:
data=pd.read_csv("/content/drive/MyDrive/bigcon/final_dataset.csv")

In [5]:
data.drop(['Unnamed: 0'],axis=1,inplace=True)

In [6]:
data

Unnamed: 0,year,month,date,emd_nm,em_g,total_pop,제주_resd_ratio,제주_resd_pop,visit_ratio,card_use_cnt,delivery_per_cnt,market_per_cnt,frgn_ratio,youth_ratio
0,2018,1,1,건입동,1708250,10028,0.552623,7274.890502,49.642958,1806.0,0.002769,0.326135,0.038427,0.200261
1,2018,1,1,남원읍,1239600,19740,0.704247,12143.821692,41.452047,1581.0,0.015813,0.294750,0.025900,0.177984
2,2018,1,1,노형동,9357900,54958,0.769356,50660.687565,25.674217,12507.0,0.009834,0.392180,0.039271,0.195624
3,2018,1,1,대륜동,1717700,13651,0.667224,11185.575639,42.502310,2032.0,0.009843,0.422736,0.014440,0.208504
4,2018,1,1,대정읍,1264950,22603,0.784751,16075.181182,28.652628,1973.0,0.017233,0.437912,0.053508,0.168114
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50881,2021,6,30,한경면,973850,9495,0.749118,7257.895822,35.350145,1286.0,0.031104,0.328927,0.042559,0.171578
50882,2021,6,30,한림읍,3298100,24462,0.769060,20632.727912,35.614095,3044.0,0.084428,0.219777,0.048140,0.197206
50883,2021,6,30,화북동,3749600,24630,0.705806,24117.268821,27.871984,5644.0,0.114103,0.291460,0.010642,0.167519
50884,2021,6,30,효돈동,1011050,5314,0.744489,3726.154500,31.896263,714.0,0.021008,0.450980,0.014730,0.165783


In [7]:
data['year']=data['year'].astype(str)
data['month']=data['month'].astype(str)
data['date']=data['date'].astype(str)

In [8]:
data['base_date']=data['year']+"-"+data['month']+"-"+data['date']

In [9]:
data['base_date']=pd.to_datetime(data['base_date'])

In [10]:
data.drop(['year','month','date'],axis=1,inplace=True)

In [11]:
data.head()

Unnamed: 0,emd_nm,em_g,total_pop,제주_resd_ratio,제주_resd_pop,visit_ratio,card_use_cnt,delivery_per_cnt,market_per_cnt,frgn_ratio,youth_ratio,base_date
0,건입동,1708250,10028,0.552623,7274.890502,49.642958,1806.0,0.002769,0.326135,0.038427,0.200261,2018-01-01
1,남원읍,1239600,19740,0.704247,12143.821692,41.452047,1581.0,0.015813,0.29475,0.0259,0.177984,2018-01-01
2,노형동,9357900,54958,0.769356,50660.687565,25.674217,12507.0,0.009834,0.39218,0.039271,0.195624,2018-01-01
3,대륜동,1717700,13651,0.667224,11185.575639,42.50231,2032.0,0.009843,0.422736,0.01444,0.208504,2018-01-01
4,대정읍,1264950,22603,0.784751,16075.181182,28.652628,1973.0,0.017233,0.437912,0.053508,0.168114,2018-01-01


In [12]:
train, test = test_data_split(data)

# card_use_cnt 예측

In [23]:
test_df=pd.DataFrame(columns=['emd_nm','base_date','card_use_cnt'])

In [14]:
import warnings
from math import sqrt
from sklearn.metrics import mean_squared_error

In [25]:
for region in data['emd_nm'].unique():
    data_pop=data.loc[data['emd_nm']==region,['card_use_cnt','base_date']]
    data_pop.rename(columns={'card_use_cnt':'y','base_date':'ds'},inplace=True)
    model=Prophet(changepoint_range=0.9,changepoint_prior_scale=0.8,seasonality_mode = 'multiplicative',n_changepoints=100)
    model.daily_seasonality=True
    model.weekly_seasonality=True
    model.yearly_seasonality=True
    model.fit(data_pop)
    future=model.make_future_dataframe(periods=62,freq='D')
    forecast=model.predict(future)
    forecast=forecast[['ds','yhat']].tail(62)
    forecast.rename(columns={'ds':'base_date','yhat':'card_use_cnt'},inplace=True)
    forecast['emd_nm']=region
    test_df=pd.concat([test_df,forecast])

In [15]:
mse_lst = []
for region in train['emd_nm'].unique():
    test_temp = test.loc[test['emd_nm']==region,['card_use_cnt','base_date']]
    data_pop=train.loc[train['emd_nm']==region,['card_use_cnt','base_date']]
    data_pop.rename(columns={'card_use_cnt':'y','base_date':'ds'},inplace=True)
    model=Prophet(changepoint_range=0.9,changepoint_prior_scale=0.8,seasonality_mode = 'multiplicative',n_changepoints=100)
    model.daily_seasonality=True
    model.weekly_seasonality=True
    model.yearly_seasonality=True
    model.fit(data_pop)
    future=model.make_future_dataframe(periods=len(test_temp),freq='D')
    forecast=model.predict(future)
    forecast=forecast[['ds','yhat']].tail(len(test_temp))
    forecast.rename(columns={'ds':'base_date','yhat':'card_use_cnt'},inplace=True)
    forecast['emd_nm']=region
    test_df=pd.concat([test_df,forecast])
    mse_lst.append(sqrt(mean_squared_error(forecast['card_use_cnt'],test_temp['card_use_cnt'])))
    print(region,':',sqrt(mean_squared_error(forecast['card_use_cnt'],test_temp['card_use_cnt'])))
print('전체 평균:',np.mean(mse_lst))

건입동 : 105.46051612459011
남원읍 : 156.2545360694914
노형동 : 586.7500736575535
대륜동 : 202.8215011986404
대정읍 : 435.5875748090971
대천동 : 216.90894779797824
도두동 : 62.4789697686139
동홍동 : 294.4237534405882
봉개동 : 123.71200882864876
삼도1동 : 197.38058558319915
삼도2동 : 129.7913638164341
삼양동 : 238.92515029956354
서홍동 : 221.00269068896202
성산읍 : 121.77908538433807
송산동 : 72.68258537523157
아라동 : 400.8866600543471
안덕면 : 175.44865803566893
애월읍 : 417.11112894803375
연동 : 571.0288583417333
영천동 : 90.34248169851782
예래동 : 99.72183295160602
오라동 : 263.00868031609593
외도동 : 280.0702560246775
용담1동 : 84.54760963574707
용담2동 : 248.3981504351388
이도1동 : 139.27074698272406
이도2동 : 573.44552537277
이호동 : 92.30326506005906
일도1동 : 69.0870680913489
일도2동 : 455.51867419531584
정방동 : 52.47014656622207
중문동 : 150.64509309222947
중앙동 : 86.70595933396582
천지동 : 80.22900762545662
표선면 : 140.83408139012
화북동 : 196.01539593776965
효돈동 : 119.57094858884638
전체 : 5353.455246056841
조천읍 : 273.65844441011274
구좌읍 : 135.9924876665034
한림읍 : 355.17620059113597

In [None]:
test_df

Unnamed: 0,emd_nm,base_date,card_use_cnt
1276,건입동,2021-07-31,2620.450508
1277,건입동,2021-08-31,2232.604802
1278,건입동,2021-09-30,2171.089782
1276,남원읍,2021-07-31,2362.084697
1277,남원읍,2021-08-31,2077.894464
...,...,...,...
600,한림읍,2021-08-31,2973.247067
601,한림읍,2021-09-30,2966.313077
597,한경면,2021-07-31,1083.340724
598,한경면,2021-08-31,1096.180816


# delivery_per_cnt 예측

In [26]:
test_ratio=pd.DataFrame(columns=['emd_nm','base_date','delivery_per_cnt'])

In [27]:
for region in data['emd_nm'].unique():
    data_pop=data.loc[data['emd_nm']==region,['delivery_per_cnt','base_date']]
    data_pop.rename(columns={'delivery_per_cnt':'y','base_date':'ds'},inplace=True)
    model=Prophet(seasonality_mode = 'multiplicative')
    model.daily_seasonality=True
    model.weekly_seasonality=True
    model.yearly_seasonality=True
    model.fit(data_pop)
    future=model.make_future_dataframe(periods=62,freq='D')
    forecast=model.predict(future)
    forecast=forecast[['ds','yhat']].tail(62)
    forecast.rename(columns={'ds':'base_date','yhat':'delivery_per_cnt'},inplace=True)
    forecast['emd_nm']=region
    test_ratio=pd.concat([test_ratio,forecast])

In [19]:
mse_lst = []
for region in train['emd_nm'].unique():
    test_temp = test.loc[test['emd_nm']==region,['delivery_per_cnt','base_date']]
    data_pop=train.loc[train['emd_nm']==region,['delivery_per_cnt','base_date']]
    data_pop.rename(columns={'delivery_per_cnt':'y','base_date':'ds'},inplace=True)
    model=Prophet(seasonality_mode = 'multiplicative')
    model.daily_seasonality=True
    model.weekly_seasonality=True
    model.yearly_seasonality=True
    model.fit(data_pop)
    future=model.make_future_dataframe(periods=len(test_temp),freq='D')
    forecast=model.predict(future)
    forecast=forecast[['ds','yhat']].tail(len(test_temp))
    forecast.rename(columns={'ds':'base_date','yhat':'delivery_per_cnt'},inplace=True)
    forecast['emd_nm']=region
    test_ratio=pd.concat([test_ratio,forecast])
    mse_lst.append(sqrt(mean_squared_error(forecast['delivery_per_cnt'],test_temp['delivery_per_cnt'])))
    print(region,':',sqrt(mean_squared_error(forecast['delivery_per_cnt'],test_temp['delivery_per_cnt'])))
print('전체 평균:',np.mean(mse_lst))

건입동 : 0.016339812527519627
남원읍 : 0.013073701250845083
노형동 : 0.010160383700114333
대륜동 : 0.016045949139816024
대정읍 : 0.01187451414204958
대천동 : 0.02272737557539623
도두동 : 0.03568528055510684
동홍동 : 0.01712481425852658
봉개동 : 0.021989380668394294
삼도1동 : 0.01147306574212407
삼도2동 : 0.018779986227394668
삼양동 : 0.01604680411024796
서홍동 : 0.021951679530447066
성산읍 : 0.014716776900003918
송산동 : 0.021296325195540915
아라동 : 0.011120133530909017
안덕면 : 0.01656605340343728
애월읍 : 0.011008109422330194
연동 : 0.01598392704527682
영천동 : 0.021093051901200702
예래동 : 0.02637822053124626
오라동 : 0.01306559379961838
외도동 : 0.012819981676453072
용담1동 : 0.018184103060835035
용담2동 : 0.017852208723712768
이도1동 : 0.02059539037573047
이도2동 : 0.011572472403700981
이호동 : 0.02642854403181876
일도1동 : 0.04005358116079284
일도2동 : 0.010466031358092893
정방동 : 0.03510120527545059
중문동 : 0.016234758183888368
중앙동 : 0.02342085402279155
천지동 : 0.025489012285701433
표선면 : 0.018554621409515994
화북동 : 0.013951575473388426
효돈동 : 0.015924918003054084
전체 : 0.00

In [None]:
test_ratio[test_ratio['emd_nm'] == '건입동']

Unnamed: 0,emd_nm,base_date,delivery_per_cnt
1276,건입동,2021-07-31,0.128435
1277,건입동,2021-08-31,0.123999
1278,건입동,2021-09-30,0.127485


# market_per_cnt 예측

In [28]:
test_per=pd.DataFrame(columns=['emd_nm','base_date','market_per_cnt'])

In [29]:
for region in data['emd_nm'].unique():
    data_pop=data.loc[data['emd_nm']==region,['market_per_cnt','base_date']]
    data_pop.rename(columns={'market_per_cnt':'y','base_date':'ds'},inplace=True)
    model=Prophet(seasonality_mode = 'multiplicative')
    model.daily_seasonality=True
    model.weekly_seasonality=True
    model.yearly_seasonality=True
    model.fit(data_pop)
    future=model.make_future_dataframe(periods=62,freq='D')
    forecast=model.predict(future)
    forecast=forecast[['ds','yhat']].tail(62)
    forecast.rename(columns={'ds':'base_date','yhat':'market_per_cnt'},inplace=True)
    forecast['emd_nm']=region
    test_per=pd.concat([test_per,forecast])

In [21]:
mse_lst = []
for region in train['emd_nm'].unique():
    test_temp = test.loc[test['emd_nm']==region,['market_per_cnt','base_date']]
    data_pop=train.loc[train['emd_nm']==region,['market_per_cnt','base_date']]
    data_pop.rename(columns={'market_per_cnt':'y','base_date':'ds'},inplace=True)
    model=Prophet(seasonality_mode = 'multiplicative')
    model.daily_seasonality=True
    model.weekly_seasonality=True
    model.yearly_seasonality=True
    model.fit(data_pop)
    future=model.make_future_dataframe(periods=len(test_temp),freq='D')
    forecast=model.predict(future)
    forecast=forecast[['ds','yhat']].tail(len(test_temp))
    forecast.rename(columns={'ds':'base_date','yhat':'market_per_cnt'},inplace=True)
    forecast['emd_nm']=region
    #test_df=pd.concat([test_df,forecast])
    mse_lst.append(sqrt(mean_squared_error(forecast['market_per_cnt'],test_temp['market_per_cnt'])))
    print(region,':',sqrt(mean_squared_error(forecast['market_per_cnt'],test_temp['market_per_cnt'])))
print('전체 평균:',np.mean(mse_lst))

건입동 : 0.020804751656480047
남원읍 : 0.027553852872213878
노형동 : 0.019807374782915377
대륜동 : 0.03738881248209053
대정읍 : 0.0267276153742617
대천동 : 0.036549107348224426
도두동 : 0.04609670030614907
동홍동 : 0.02926863973527961
봉개동 : 0.04212397615741952
삼도1동 : 0.02248604837114582
삼도2동 : 0.03305632607579396
삼양동 : 0.02360251672455516
서홍동 : 0.045525398872427035
성산읍 : 0.03237377666924413
송산동 : 0.03851982885688585
아라동 : 0.015713564570479132
안덕면 : 0.02375657734016506
애월읍 : 0.01916527013232699
연동 : 0.016243734778815272
영천동 : 0.05184498517106026
예래동 : 0.049515013212740634
오라동 : 0.02517458755279513
외도동 : 0.01910317788884105
용담1동 : 0.034136223855443674
용담2동 : 0.02559815215250283
이도1동 : 0.02347634678395293
이도2동 : 0.016203970839506174
이호동 : 0.03955138659078727
일도1동 : 0.04657478130850427
일도2동 : 0.01549589643818756
정방동 : 0.05561512594692737
중문동 : 0.03473441520280646
중앙동 : 0.050737154233971224
천지동 : 0.06568563286339499
표선면 : 0.024766444734468755
화북동 : 0.018496432482937548
효돈동 : 0.044773908249853674
전체 : 0.01277294944

In [None]:
test_per

Unnamed: 0,emd_nm,base_date,market_per_cnt
1276,건입동,2021-07-31,0.273041
1277,건입동,2021-08-31,0.296707
1278,건입동,2021-09-30,0.290987
1276,남원읍,2021-07-31,0.258271
1277,남원읍,2021-08-31,0.281904
...,...,...,...
600,한림읍,2021-08-31,0.225575
601,한림읍,2021-09-30,0.225885
597,한경면,2021-07-31,0.279432
598,한경면,2021-08-31,0.298511


In [34]:
one = pd.merge(test_df,test_ratio,on=['emd_nm','base_date'])
two = pd.merge(one,test_per,on=['emd_nm','base_date'])
two.to_csv('/content/drive/MyDrive/bigcon/test_data.csv',index=False)