# Modelling

### Libraries

In [46]:
# import libraries

import pandas as pd
import numpy as np

from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.arima.model import ARMA
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

### Data

In [7]:
# load negative, neutral and positive time series

negative_reviews_per_month = pd.read_csv('features/negative_reviews_per_month.csv', index_col=0, parse_dates=True)
neutral_reviews_per_month = pd.read_csv('features/neutral_reviews_per_month.csv', index_col=0, parse_dates=True)
positive_reviews_per_month = pd.read_csv('features/positive_reviews_per_month.csv', index_col=0, parse_dates=True)

In [8]:
# load sentiment time series

sentiment_per_month = pd.read_csv('features/sentiment_per_month.csv', index_col=0, parse_dates=True)

In [9]:
# load categories time series

sentiment_restaurants_per_month = pd.read_csv('features/sentiment_restaurants_per_month.csv', index_col=0, parse_dates=True)
sentiment_nightlife_per_month = pd.read_csv('features/sentiment_nightlife_per_month.csv', index_col=0, parse_dates=True)
sentiment_breakfast_brunch_per_month = pd.read_csv('features/sentiment_breakfast_brunch_per_month.csv', index_col=0, parse_dates=True)

### Models

In [28]:
def baseline_model(data, method='mean'):
    '''
    Baseline model to predict the next value in a time series
    '''
    if method == 'mean':
        return data.mean()
    elif method == 'median':
        return data.median()
    elif method == 'naive':
        return data.iloc[-1]
    else:
        raise ValueError('Method not supported')

In [38]:
def ses_model(data):
    '''
    Simple Exponential Smoothing model
    '''
    model = SimpleExpSmoothing(data)
    fitted_model = model.fit()
    prediction = fitted_model.forecast(1)
    return prediction.iloc[0]

In [None]:
def holt_linear_damped_model(data):
    '''
    Holt Linear Trend model with damping
    '''
    model = ExponentialSmoothing(data, trend='add', damped_trend=True)
    fitted_model = model.fit()
    prediction = fitted_model.forecast(1)
    return prediction.iloc[0]

In [None]:
def arima_model(data, order):
    '''
    ARIMA model
    '''
    model = ARIMA(data, order=order)
    fitted_model = model.fit()
    prediction = fitted_model.forecast(steps=1)
    return prediction[0]

In [None]:
def arma_model(data, order):
    '''
    ARMA model
    '''
    model = ARMA(data, order=order)
    fitted_model = model.fit()
    prediction = fitted_model.forecast(steps=1)
    return prediction[0]

In [None]:
models = {
    'baseline_mean': lambda x: baseline_model(x, method='mean'),
    'baseline_median': lambda x: baseline_model(x, method='median'),
    'baseline_naive': lambda x: baseline_model(x, method='naive'),
    'ses': lambda x: ses_model(x),
    'arima': lambda x: arima_model(x, order=(1, 1, 1)),
}

### Predictions

In [None]:
# lambda values

lambda_negative = 0.2
lambda_neutral = 0.2
lambda_positive = 0.2
lambda_sentiment = 2
lambda_restaurants = 0.5
lambda_nightlife = 0.5
lambda_breakfast_brunch = 0.5

In [None]:
def inverse_boxcox(transformed_y, lambda_value):
    '''
    Function to inverse Box-Cox transformation (to obtain forecast in original scale)
    '''
    if lambda_value == 0:
        return np.exp(transformed_y)
    else:
        return np.exp(np.log(transformed_y * lambda_value + 1) / lambda_value)

In [26]:
def cross_validation_predictions(data, model, min_train_size=12):
    '''
    Function to perform cross-validation and obtain predictions
    '''
    results = []
    for i in range(min_train_size, len(data)):
        train = data.iloc[:i]
        test = data.iloc[i:i+1]
        prediction = model(train)
        if not test.empty:
            result_entry = {'date': test.index[0], 'actual': test.iloc[0], 'prediction': prediction}
            results.append(result_entry)
    return pd.DataFrame(results)

### Evaluation

In [41]:
def mean_absolute_percentage_error(y_true, y_pred):
    '''
    Mean Absolute Percentage Error (MAPE) metric
    '''
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [42]:
def mean_absolute_scaled_error(y_true, y_pred, y_train):
    '''
    Mean Absolute Scaled Error (MASE) metric
    '''
    y_true, y_pred, y_train = np.array(y_true), np.array(y_pred), np.array(y_train)
    naive_forecast = y_train[1:] - y_train[:-1]
    mae_naive = np.mean(np.abs(naive_forecast))
    mae_forecast = np.mean(np.abs(y_true - y_pred))
    mase = mae_forecast / mae_naive
    return mase

In [43]:
def evaluate(y_true, y_pred):
    '''
    Evaluate the model using different metrics
    '''
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    return mae, mse, rmse, mape

#### Negative reviews

In [None]:
# TODO

#### Positive reviews

In [None]:
# TODO

#### Neutral reviews

In [None]:
# TODO

#### Sentiment

In [None]:
# TODO

#### Restaurant sentiment

In [None]:
# TODO

#### Nightlife sentiment

In [None]:
# TODO

#### Breakfast & brunch sentiment

In [None]:
# TODO