# Feature Engineering

### Libraries

In [None]:
# import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from scipy.stats import boxcox
from statsmodels.tsa.seasonal import seasonal_decompose

### Data

In [None]:
# load negative, neutral and positive time series

negative_reviews_per_month = pd.read_csv('data/negative_reviews_per_month.csv', index_col=0, parse_dates=True)
neutral_reviews_per_month = pd.read_csv('data/neutral_reviews_per_month.csv', index_col=0, parse_dates=True)
positive_reviews_per_month = pd.read_csv('data/positive_reviews_per_month.csv', index_col=0, parse_dates=True)

In [None]:
# load sentiment time series

sentiment_per_month = pd.read_csv('data/sentiment_per_month.csv', index_col=0, parse_dates=True)

sentiment_per_month

In [None]:
# load categories time series

sentiment_restaurants_per_month = pd.read_csv('data/sentiment_restaurants_per_month.csv', index_col=0, parse_dates=True)
sentiment_nightlife_per_month = pd.read_csv('data/sentiment_nightlife_per_month.csv', index_col=0, parse_dates=True)
sentiment_breakfast_brunch_per_month = pd.read_csv('data/sentiment_breakfast_brunch_per_month.csv', index_col=0, parse_dates=True)

### Adjustments

In [None]:
# box-cox transformation for negative, neutral and positive reviews

boxcox_negative_reviews_per_month = negative_reviews_per_month.copy()
shifted_negative_reviews = negative_reviews_per_month['count'] + 1
boxcox_negative_reviews_per_month['count'], _ = boxcox(shifted_negative_reviews)

boxcox_neutral_reviews_per_month = neutral_reviews_per_month.copy()
shifted_neutral_reviews = neutral_reviews_per_month['count'] + 1
boxcox_neutral_reviews_per_month['count'], _ = boxcox(shifted_neutral_reviews)

boxcox_positive_reviews_per_month = positive_reviews_per_month.copy()
shifted_positive_reviews = positive_reviews_per_month['count'] + 1
boxcox_positive_reviews_per_month['count'], _ = boxcox(shifted_positive_reviews)

plt.figure(figsize=(16, 4))
plt.plot(boxcox_negative_reviews_per_month, label='Negative Reviews per Month (Box-Cox Transformed)')
plt.plot(boxcox_neutral_reviews_per_month, label='Neutral Reviews per Month (Box-Cox Transformed)')
plt.plot(boxcox_positive_reviews_per_month, label='Positive Reviews per Month (Box-Cox Transformed)')
plt.title('Box-Cox Transformed Time Series')
plt.legend()
plt.show()

In [None]:
# box-cox transformation for sentiment

boxcox_sentiment_per_month = sentiment_per_month.copy()
boxcox_sentiment_per_month['sentiment'], _ = boxcox(sentiment_per_month['sentiment'])

plt.figure(figsize=(16, 4))
plt.plot(sentiment_per_month, label='Sentiment per Month')
plt.plot(boxcox_sentiment_per_month, label='Sentiment per Month (Box-Cox Transformed)')
plt.title('Box-Cox Transformed Time Series')
plt.legend()
plt.show()   

### Decomposition

In [None]:
def decomposition(time_series, label='sentiment', model='additive', freq=12):
    '''
    Decompose a time series into trend, seasonal and residual components.
    '''
    decomposition_ts = time_series.copy()
    decomposition = seasonal_decompose(decomposition_ts[label], model=model, period=freq)
    decomposition_ts['trend'] = decomposition.trend
    decomposition_ts['seasonal'] = decomposition.seasonal
    decomposition_ts['residual'] = decomposition.resid
    return decomposition_ts

In [None]:
def plot_decomposition(data, label='sentiment', title=None):
    '''
    Plot the decomposition of a time series.
    '''
    plt.figure(figsize=(12, 8))
    plt.subplot(411)
    plt.plot(data[label], label='Original', color='blue')
    plt.legend(loc='best')
    plt.subplot(412)
    plt.plot(data['trend'], label='Trend', color='blue')
    plt.legend(loc='best')
    plt.subplot(413)
    plt.plot(data['seasonal'], label='Seasonal', color='blue')
    plt.legend(loc='best')
    plt.subplot(414)
    plt.plot(data['residual'], label='Residual', color='blue')
    plt.legend(loc='best')
    plt.tight_layout()
    if title:
        plt.suptitle(title)
    plt.show()

In [None]:
# box-cox additive decomposition for negative, neutral and positive reviews

negative_reviews_per_month_additive = decomposition(boxcox_negative_reviews_per_month, label='count', model='additive')
neutral_reviews_per_month_additive = decomposition(boxcox_neutral_reviews_per_month, label='count', model='additive')
positive_reviews_per_month_additive = decomposition(boxcox_positive_reviews_per_month, label='count', model='additive')

In [None]:
# visualize decomposition of negative, neutral and positive reviews

plot_decomposition(negative_reviews_per_month_additive, label='count', title='Negative Reviews')
print('\n\n')
plot_decomposition(neutral_reviews_per_month_additive, label='count', title='Neutral Reviews')
print('\n\n')
plot_decomposition(positive_reviews_per_month_additive, label='count', title='Positive Reviews')

In [None]:
# additive decomposition (using 7 months because of PACF plot analysis)

sentiment_per_month_additive = decomposition(sentiment_per_month, model='additive', freq=7)

In [None]:
# visualize decomposition of sentiment per month

plot_decomposition(sentiment_per_month_additive)

In [None]:
# multiplicative decomposition (using 7 months because of PACF plot analysis)

sentiment_per_month_multiplicative = decomposition(sentiment_per_month, model='multiplicative', freq=7)

In [None]:
# visualize decomposition of sentiment per month

plot_decomposition(sentiment_per_month_multiplicative)

In [None]:
# save sentiment per month decomposed

negative_reviews_per_month_additive.to_csv('features/negative_reviews_per_month_additive.csv')
neutral_reviews_per_month_additive.to_csv('features/neutral_reviews_per_month_additive.csv')
positive_reviews_per_month_additive.to_csv('features/positive_reviews_per_month_additive.csv')
sentiment_per_month_additive.to_csv('features/sentiment_per_month_additive.csv')

#### Category sentiment

In [None]:
# additive decomposition for categories

sentiment_restaurants_per_month_additive = decomposition(sentiment_restaurants_per_month, model='additive', freq=7)
sentiment_nightlife_per_month_additive = decomposition(sentiment_nightlife_per_month, model='additive', freq=7)
sentiment_breakfast_brunch_per_month_additive = decomposition(sentiment_breakfast_brunch_per_month, model='additive', freq=7)

In [None]:
# visualize decomposition of sentiment per month

plot_decomposition(sentiment_restaurants_per_month_additive, 'Restaurants')
print('\n\n')
plot_decomposition(sentiment_nightlife_per_month_additive, 'Nightlife')
print('\n\n')
plot_decomposition(sentiment_breakfast_brunch_per_month_additive, 'Breakfast & Brunch')

In [None]:
# save sentiment per month decomposed

sentiment_restaurants_per_month_additive.to_csv('features/sentiment_restaurants_per_month_additive.csv')
sentiment_nightlife_per_month_additive.to_csv('features/sentiment_nightlife_per_month_additive.csv')
sentiment_breakfast_brunch_per_month_additive.to_csv('features/sentiment_breakfast_brunch_per_month_additive.csv')