# Timeseries Model

### This model will use the data provided by the DTM analysis. It will train and try to predict based on it
Due to poor results on historical forecasting, this does have any future prediction

In [None]:
import random

import matplotlib.pyplot as plt
import pandas as pd
from darts import TimeSeries
from darts.dataprocessing.transformers import MissingValuesFiller, Scaler
from darts.metrics import mae, mape, r2_score
from darts.models import NBEATSModel
from tqdm.notebook import tqdm
import statistics


In [None]:
def display_forecast(pred_series, ts_transformed, forecast_type, start_date=None, only_stats=False):
    """
    Method for displaying any forecast realised using DARTS
    Source: https://unit8co.github.io/darts/examples/07-NBEATS-examples.html
    """
    reverted_ts_transformed = scaler.inverse_transform(ts_transformed)
    reverted_pred_series = scaler.inverse_transform(pred_series)
    if not only_stats:
        plt.figure(figsize=(12, 7))
        if start_date:
            reverted_ts_transformed = reverted_ts_transformed.drop_before(start_date)
        reverted_ts_transformed.univariate_component(0).plot(label="actual")
        reverted_pred_series.plot(label=("historic " + forecast_type + " forecasts"))
        plt.title("R2: {}".format(r2_score(reverted_ts_transformed, reverted_pred_series)))
        plt.legend()
    mae_result = mae(reverted_ts_transformed, reverted_pred_series)
    mape_result = mape(reverted_ts_transformed, reverted_pred_series)
    r2_result = r2_score(reverted_ts_transformed, reverted_pred_series)
    return mae_result,mape_result,r2_result


In [None]:
def topics_to_series(topics, tot_df):
    """
    Method for converting a topics column into a series to be inputted into any DARTS model
    """
    series_topics = []
    global scaler
    global filler
    scaler = Scaler(n_jobs=-1, name="Faster")
    filler = MissingValuesFiller(n_jobs=-1, name="Faster")
    for topic in tqdm(topics):
        temp_df = tot_df[tot_df["Topic"] == topic]
        temp_series = TimeSeries.from_dataframe(
            temp_df, "Timestamp", "Frequency", freq="YS"
        )
        temp_series = filler.transform(temp_series, method="quadratic")
        temp_series = scaler.fit_transform(temp_series)
        series_topics.append(temp_series)
    return series_topics


In [None]:
# Uses saved DTM output stored on Drive
topics_over_time = pd.read_csv(
    "output/DTM/collab_dataset_07_22__220000_DTM_collab_custom_default_custom_10_(1, 2)_700.csv"
)


In [None]:
grouped_tot = topics_over_time.groupby(by="Topic").count().reset_index()

# Extracts and removes any topic which is too short to be used in either training or testing
outlier_topics = grouped_tot[grouped_tot["Frequency"] < 6]["Topic"].unique()
topics_over_time = topics_over_time[~(topics_over_time["Topic"].isin(outlier_topics))]

In [None]:
# Splitting data into training and testing and converting it into DARTS series
topics = set(topics_over_time["Topic"].unique())

training_topics = random.sample(topics, int(len(topics) * 9 / 10))
training_series = topics_to_series(training_topics, topics_over_time)

test_topics = list(topics.difference(set(training_topics)))
testing_series = topics_to_series(test_topics, topics_over_time)


In [None]:
# Initialising model and training it
model = NBEATSModel(
    input_chunk_length=3,
    output_chunk_length=3,
    generic_architecture=True,
    num_stacks=10,
    num_blocks=1,
    num_layers=4,
    layer_widths=512,
    n_epochs=100,
    nr_epochs_val_period=1,
    model_name="nbeats_run"
)
model.fit(training_series)


### Visualising results for a topic

In [None]:
test_topic = 43

In [None]:
topics_over_time[topics_over_time['Topic'] == training_topics[test_topic]]

In [None]:
pred_series = model.historical_forecasts(
    training_series[test_topic],
    start=pd.Timestamp("2014-01-01"),
    forecast_horizon=3,
    retrain=False,
    verbose=True,
)

mae_result, mape_result, r2_result = display_forecast(
    pred_series,
    training_series[test_topic],
    "annual",
    start_date=pd.Timestamp("2014-01-01"),
)


In [None]:
print(f"MAE: {mae_result}")
print(f"MAPE: {mape_result}")
print(f"R2: {r2_result}")


In [None]:
model.save_model('models/nbeats_forecasting_model.pth.tar')

In [None]:
pred_series = model.historical_forecasts(
    testing_series[test_topic],
    start=pd.Timestamp("2014-01-01"),
    forecast_horizon=3,
    retrain=False,
    verbose=True,
)

mae_result, mape_result, r2_result = display_forecast(
    pred_series,
    testing_series[test_topic],
    "annual",
    start_date=pd.Timestamp("2014-01-01"),
)


In [None]:
print(f"MAE: {mae_result}")
print(f"MAPE: {mape_result}")
print(f"R2: {r2_result}")

### Computing historical forecasts performance

In [None]:
mae_list = []
mape_list = []
r2_list = []

# Performance on testing data
for series in tqdm(testing_series):
    try:
        pred_series = model.historical_forecasts(
            series,
            start=pd.Timestamp("2014-01-01"),
            forecast_horizon=3,
            retrain=False,
            verbose=True,
        )

        mae_result, mape_result, r2_result = display_forecast(
            pred_series,
            series,
            "annual",
            start_date=pd.Timestamp("2014-01-01"),
            only_stats=True
        )
        mae_list.append(mae_result)
        mape_list.append(mape_result)
        r2_list.append(r2_result)
    except ValueError:
        continue

print(mae_list)
print(mape_list)
print(r2_list)

print(statistics.mean(mae_list))
print(statistics.mean(mape_list))
print(statistics.mean(r2_list))

