In [241]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from xgboost.sklearn import XGBRegressor  
import pickle
import datetime
sns.set_style("darkgrid")
model_dir = './saved_xgb_models/'
data_dir = './trimmed_data_and_plots/'
predict_dir = './produce_predictions/'

In [242]:
def read_stored_model(city, veggie, directory):
    loaded_model = pickle.load(open(directory+city+'_'+veggie+'_model', "rb"))
    return loaded_model

In [243]:
def read_trimmed_data(city, veggie, data_directory):
    # reads in cleaned .csv data for one city and veggie, returns
    output_data = pd.read_csv(data_dir+veggie+'_'+city+'_TRIM.csv')
    output_data['Date'] = pd.to_datetime(output_data['Date'])
    output_data = output_data.sort_values(by='Date')
    output_data = output_data.reset_index(drop=True)
    return output_data

In [244]:
def nearest_date(dates, targdate):
    for i in dates:
        i = i.to_pydatetime()
    nearest = min(dates, key=lambda x: abs(x - targdate))
    timedelta = abs(nearest - targdate)
    return nearest, timedelta

In [245]:
def get_train(veggie_data, train_length):
    all_dates = pd.to_datetime(veggie_data['Date'])
    start_date = all_dates.tail(1).iloc[0]
    train_time = pd.to_timedelta(pd.np.ceil(train_days), unit="D")
    start_date_train = start_date - train_time
    start_date_train = start_date_train
    nearest_date_train, deltatrain = nearest_date(all_dates, start_date_train)
    training_set = veggie_data[(veggie_data['Date'] >= nearest_date_train) & (veggie_data['Date'] < start_date)]
    return training_set

In [246]:
def build_features(input_data):
    input_data = [input_data['Average Price'].mean()]
    return(input_data)

In [247]:
def current_price(input_data):
    # retrieve the last week's worth of data
    return input_data['Average Price'].tail(7).mean()

In [248]:
cities = ['NEW+YORK', 'LOS+ANGELES']
veggies = ['APPLES','APRICOTS','ASPARAGUS','AVOCADOS','BANANAS','BEANS','BEETS','BLACKBERRIES','BLUEBERRIES','BROCCOLI','BRUSSELS+SPROUTS','CABBAGE','CANTALOUPS','CARROTS','CAULIFLOWER','CELERY','CHERRIES','CLEMENTINES', 'CUCUMBERS','ENDIVE','GARLIC','GINGER+ROOT','GRAPEFRUIT','GRAPES','HONEYDEWS','KIWIFRUIT','LEMONS','LETTUCE%2C+ICEBERG','LETTUCE%2C+ROMAINE','LETTUCE%2C+RED+LEAF','LETTUCE%2C+GREEN+LEAF', 'LIMES','MANGOES','NECTARINES','OKRA','ORANGES','PEACHES','PEARS','PEAS+GREEN','PEPPERS%2C+BELL+TYPE','PINEAPPLES','PLUMS','POTATOES','RADISHES','RASPBERRIES','RHUBARB','SPINACH','SQUASH','STRAWBERRIES','TURNIPS']
train_days = 365*3

In [252]:
labels = ['Item', 'Current Price', '3 Month Prediction']
# city loop

for c in cities:
    
    outputs = []

    for v in veggies:

        input_data = read_trimmed_data(c, v, data_dir)

        historical_data = get_train(input_data, train_days)

        inputX = build_features(historical_data)
        currentprice = current_price(historical_data)

        model = read_stored_model(c, v, model_dir)

        outputY = model.predict(inputX)
        print(v)

        outputs.append((v, currentprice, outputY[0]))
    all_predictions = pd.DataFrame.from_records(outputs, columns=labels)

    all_predictions.to_csv(predict_dir + c + '_predictions.csv')

    # end city loop


APPLES
APRICOTS
ASPARAGUS
AVOCADOS
BANANAS
BEANS
BEETS
BLACKBERRIES
BLUEBERRIES
BROCCOLI
BRUSSELS+SPROUTS
CABBAGE
CANTALOUPS
CARROTS
CAULIFLOWER
CELERY
CHERRIES
CLEMENTINES
CUCUMBERS
ENDIVE
GARLIC
GINGER+ROOT
GRAPEFRUIT
GRAPES
HONEYDEWS
KIWIFRUIT
LEMONS
LETTUCE%2C+ICEBERG
LETTUCE%2C+ROMAINE
LETTUCE%2C+RED+LEAF
LETTUCE%2C+GREEN+LEAF
LIMES
MANGOES
NECTARINES
OKRA
ORANGES
PEACHES
PEARS
PEAS+GREEN
PEPPERS%2C+BELL+TYPE
PINEAPPLES
PLUMS
POTATOES
RADISHES
RASPBERRIES
RHUBARB
SPINACH
SQUASH
STRAWBERRIES
TURNIPS
APPLES
APRICOTS
ASPARAGUS
AVOCADOS
BANANAS
BEANS
BEETS
BLACKBERRIES
BLUEBERRIES
BROCCOLI
BRUSSELS+SPROUTS
CABBAGE
CANTALOUPS
CARROTS
CAULIFLOWER
CELERY
CHERRIES
CLEMENTINES
CUCUMBERS
ENDIVE
GARLIC
GINGER+ROOT
GRAPEFRUIT
GRAPES
HONEYDEWS
KIWIFRUIT
LEMONS
LETTUCE%2C+ICEBERG
LETTUCE%2C+ROMAINE
LETTUCE%2C+RED+LEAF
LETTUCE%2C+GREEN+LEAF
LIMES
MANGOES
NECTARINES
OKRA
ORANGES
PEACHES
PEARS
PEAS+GREEN
PEPPERS%2C+BELL+TYPE
PINEAPPLES
PLUMS
POTATOES
RADISHES
RASPBERRIES
RHUBARB
SPINACH
SQUASH
STR