# Todo
- [ ] Evaluate the forecast models
   - [x] Build model for ~2018, and test for the first week of 2019
   - [x] Measure the time of fitting
   - [ ] Repeat this procedure for all 2018
      - [x] Extract a week for input
      - [x] Add the input into the training data
      - [x] Forecast next week
      - [ ] Forecast next one day

# Dependency

In [61]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import logging
logging.getLogger('fbprophet').setLevel(logging.WARNING)

import pandas as pd
from fbprophet import Prophet
import time
from tqdm.notebook import tqdm

# Load Training & Testing Data

In [78]:
USED_CROPS = ['鳳梨-金鑽鳳梨', '香蕉', '青蔥-日蔥', '蓮霧-紅蓮霧', '芽菜類-黃豆牙', '南瓜-木瓜形', '大蒜-蒜仁', '小白菜-土白菜', '木瓜-網室紅肉', '洋香瓜-網狀紅肉']
grouped = {}

all_df = pd.read_csv('data/train.csv')
all_df = all_df.loc[all_df['市場名稱'] == '台北一']
all_df = all_df[all_df['作物名稱'].isin(USED_CROPS)]
grouped['train'] = all_df.groupby(all_df.作物名稱)

test_df = pd.read_csv('data/test.csv')
test_df = test_df.loc[test_df['市場名稱'] == '台北一']
test_df = test_df[test_df['作物名稱'].isin(USED_CROPS)]
grouped['test'] = test_df.groupby(test_df.作物名稱)

# Build Models

In [6]:
def build_model(df, y_name):
  df = df.rename(columns={y_name: 'y', 'DateTime': 'ds'})
  df['ds'] = pd.to_datetime(df['ds'])
  
  # fit a model
  m = Prophet()
  m.fit(df)
  return m  

# Define the date separate points
## Create weekly sequence for 2019

In [91]:
DATE_POINTS = []
start_date = pd.to_datetime('2019-01-01') - pd.DateOffset(days=7)
end_date = start_date

while end_date < pd.to_datetime('2020-01-01'):
  start_date = end_date
  end_date = pd.to_datetime(start_date) + pd.DateOffset(days=7)
  DATE_POINTS.append((start_date, end_date))

## Create the 7-day sequence for 2019

In [55]:
INPUTS = []
start_date = pd.to_datetime('2019-01-01')
end_date = start_date

while end_date < pd.to_datetime('2020-01-01'):
  end_date = pd.to_datetime(start_date) + pd.DateOffset(days=7)
  start_date = start_date + pd.DateOffset(days=1)
  INPUTS.append((start_date, end_date))

## Notes

### Extract 7 days from the testing data

In [79]:
# test_1 = test_data[(pd.to_datetime(test_data['DateTime']) >= DATE_POINTS[1]) & (pd.to_datetime(test_data['DateTime']) < DATE_POINTS[2])]
# test_2 = test_data[(pd.to_datetime(test_data['DateTime']) >= DATE_POINTS[2]) & (pd.to_datetime(test_data['DateTime']) < DATE_POINTS[3])]
# testing = forecast[(forecast['ds'] >= '2019-01-01') & (forecast['ds'] < '2019-12-31')]

In [7]:
df = grouped_df.get_group('香蕉')
df_price = df[['平均價', 'DateTime']]
model = build_model(df_price, '平均價')
future = model.make_future_dataframe(periods=365)
forecast = model.predict(future)

In [28]:
test_data = test_grouped_df.get_group('香蕉')

# Iterate over groups

In [80]:
for group in grouped_df.groups:
  df = grouped_df.get_group(group)
  df_price = df[['平均價', 'DateTime']]
  df_return_1 = df[['1_day_return', 'DateTime']]
  df_return_3 = df[['3_day_return', 'DateTime']]
  df_return_5 = df[['5_day_return', 'DateTime']]
  print(group)

南瓜-木瓜形
大蒜-蒜仁
小白菜-土白菜
木瓜-網室紅肉
洋香瓜-網狀紅肉
芽菜類-黃豆牙
蓮霧-紅蓮霧
青蔥-日蔥
香蕉
鳳梨-金鑽鳳梨


# Predict the whole year (output = week)

In [92]:
def predict_weekly(train_data, test_data, name='', target='平均價', save_file=True):
  prediction = []
  pbar = tqdm(total=len(DATE_POINTS))
  
  for d in DATE_POINTS:
    # extract the 7-day input
    input_data = test_data[(pd.to_datetime(test_data['DateTime']) >= d[START]) & \
                         (pd.to_datetime(test_data['DateTime']) < d[END])]
    # add to the training data
    new_train = pd.concat([train_data, input_data])
    train_price = new_train[[target, 'DateTime']]
    model = build_model(new_train, target)
    future = model.make_future_dataframe(periods=7)
    forecast = model.predict(future)
    
    # extract the predict result
    forecast = forecast[(forecast['ds'] >= d[END])]
    prediction.append(forecast)
    
    pbar.update(1)
  
  pbar.close()
  
  total_result = pd.concat(prediction)
  if save_file:
    total_result.to_csv(f'result/predict/{name}_{target}_pred_week.csv', index = False)

## Predict all the crops

In [93]:
for group in grouped_df.groups:
  print(f"# {group} ({list(grouped_df.groups.keys()).index(group)+1}/{len(grouped_df.groups)})", flush=True)
  train_data = grouped['train'].get_group(group)
  test_data = grouped['test'].get_group(group)
  predict_weekly(train_data, test_data, name=group, target='平均價')
  predict_weekly(train_data, test_data, name=group, target='3_day_return')

# 南瓜-木瓜形 (1/10)


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))


# 大蒜-蒜仁 (2/10)


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))


# 小白菜-土白菜 (3/10)


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))


# 木瓜-網室紅肉 (4/10)


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))


# 洋香瓜-網狀紅肉 (5/10)


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))


# 芽菜類-黃豆牙 (6/10)


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))


# 蓮霧-紅蓮霧 (7/10)


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))


# 青蔥-日蔥 (8/10)


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))


# 香蕉 (9/10)


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))


# 鳳梨-金鑽鳳梨 (10/10)


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=54.0), HTML(value='')))




# Backup

In [76]:
START = 0
END = 1

prediction = []
pbar = tqdm(total=len(DATE_POINTS))

for d in DATE_POINTS:
  input_data = test_data[(pd.to_datetime(test_data['DateTime']) >= d[START]) & \
                         (pd.to_datetime(test_data['DateTime']) < d[END])]
  # add to the training data
  input_price = input_data[['平均價', 'DateTime']]
  new_train = pd.concat([df_price, input_price])
  model = build_model(new_train, '平均價')
  future = model.make_future_dataframe(periods=7)
  forecast = model.predict(future)
  
  # extract the predict result
  forecast = forecast[(forecast['ds'] >= d[END])]
  prediction.append(forecast)
  
  pbar.update(1)
pbar.close()

total_result = pd.concat(prediction)
total_result.to_csv('result/banana_week_pred.csv', index = False)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=53.0), HTML(value='')))


