In [1]:
"""adjust average prices for inflation using the CPI
stores results in 'IA Average Price' column
adjusts relative to last date - i.e. 2018 dollars throughout
"""

"adjust average prices for inflation using the CPI\nstores results in 'IA Average Price' column\nadjusts relative to last date - i.e. 2018 dollars throughout\n"

In [2]:
import fbprophet as fp
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import pickle
from sklearn.model_selection import RandomizedSearchCV
sns.set_style("darkgrid")
import time
import math
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics
import random

In [3]:
# veggie and city data
datadir = '../trimmed_data_and_plots/'
veggienames = ['APPLES','APRICOTS','ASPARAGUS','AVOCADOS','BANANAS','BEANS','BEETS','BLACKBERRIES','BLUEBERRIES','BROCCOLI','BRUSSELS+SPROUTS','CABBAGE','CANTALOUPS','CARROTS','CAULIFLOWER','CELERY','CHERRIES','CLEMENTINES', 'CUCUMBERS','ENDIVE','GARLIC','GINGER+ROOT','GRAPEFRUIT','GRAPES','HONEYDEWS','KIWIFRUIT','LEMONS','LETTUCE%2C+ICEBERG','LETTUCE%2C+ROMAINE','LETTUCE%2C+RED+LEAF','LETTUCE%2C+GREEN+LEAF', 'LIMES','NECTARINES','OKRA','ORANGES','PEACHES','PEARS','PEAS+GREEN','PEPPERS%2C+BELL+TYPE','PINEAPPLES','PLUMS','POTATOES','RADISHES','RASPBERRIES','RHUBARB','SPINACH','SQUASH','STRAWBERRIES']
cities = ['NEW+YORK', 'LOS+ANGELES']

In [4]:
def read_trimmed_data_notprophet(city, veggie, data_dir):
    # reads in cleaned .csv data for one city and veggie, returns
    output_data = pd.read_csv(data_dir+veggie+'_'+city+'_TRIM.csv')
    output_data.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)
    output_data['Date'] = pd.to_datetime(output_data['Date'])
    output_data = output_data.sort_values(by='Date')
    output_data = output_data.reset_index(drop=True)
    output_data = output_data.dropna(subset=['Average Price'])
    return output_data

In [5]:
def nearest_date(dates, targdate):
    # given a pd series of dates and a target date, returns date from the series closest to target date (and distance)
    for i in dates:
        i = i.to_pydatetime()
    nearest = min(dates, key=lambda x: abs(x - targdate))
    timedelta = abs(nearest - targdate)
    return nearest, timedelta

In [6]:
# read in the cpid inverse coefficients
coeffs = pd.read_csv('./CPIAUCNS.csv')
coeffs['DATE'] = pd.to_datetime(coeffs['DATE'])
coeffs = coeffs.sort_values(by='DATE')
coeffs = coeffs.reset_index(drop=True)

In [7]:
# now loop over the data
for c in cities:
    for v in veggienames:
        nearest_dates = []
        original_data = read_trimmed_data_notprophet(c, v, datadir)
        original_data = original_data.reset_index(drop=True)
        datelist = coeffs['DATE']
        for i in range(len(original_data['Date'])):
            targetdate = original_data['Date'].iloc[i]
            closest_date, _ = nearest_date(datelist, targetdate)
            closest_coeff = coeffs[coeffs['DATE']==closest_date].dropna()
            nearest_dates.append(closest_coeff.values[0][1])
        original_data["Inflation Index"] = pd.Series(nearest_dates)
        original_data["IA Average Price"] = original_data['Average Price']/original_data['Inflation Index']
        original_data = original_data.set_index('Date')
        original_data.to_csv(datadir+v+'_'+c+'_TRIM.csv')
        