In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import spearmanr
from sklearn.linear_model import LinearRegression
from pathlib import Path

pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:.2f}'.format

train_data_dir = Path('../data/original/train.csv')
stores_data_dir = Path('../data/original/stores.csv')
transactions_data_dir = Path('../data/original/transactions.csv')
holidays_events_data_dir = Path('../data/original/holidays_events.csv')
oil_data_dir = Path('../data/original/oil.csv')


training_datatype_map = {
    'store_nbr': 'uint8',
    'family': 'category',
    'sales': 'float32',
    'onpromotion': 'uint64',
}
stores_datatype_map = {
    'cluster': 'int8',
}

train_df = pd.read_csv(
    train_data_dir,
    dtype=training_datatype_map,
    parse_dates=['date'],
)
stores_df = pd.read_csv(stores_data_dir, dtype=stores_datatype_map)   
transactions_df = pd.read_csv(transactions_data_dir, parse_dates=['date']).sort_values(['date', 'store_nbr'])
holidays_events_df = pd.read_csv(holidays_events_data_dir, parse_dates=['date'])
oil_df = pd.read_csv(oil_data_dir, parse_dates=['date'])

In [4]:
from darts.timeseries import TimeSeries
from darts.utils.missing_values import fill_missing_values
sdf = train_df.loc[(train_df.store_nbr == 1) & (train_df.family == 'GROCERY I')]

data = TimeSeries.from_dataframe(
        sdf,
        time_col='date',
        static_covariates=sdf['onpromotion'],
        value_cols='sales',
        fill_missing_dates=True,
        freq='D',
    )
data = fill_missing_values(data)
len(sdf.index)

1684

In [5]:
pd.concat([sdf, None])

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
12,12,2013-01-01,1,GROCERY I,0.00,0
1794,1794,2013-01-02,1,GROCERY I,2652.00,0
3576,3576,2013-01-03,1,GROCERY I,2121.00,0
5358,5358,2013-01-04,1,GROCERY I,2056.00,0
7140,7140,2013-01-05,1,GROCERY I,2216.00,0
...,...,...,...,...,...,...
2991990,2991990,2017-08-11,1,GROCERY I,1270.00,24
2993772,2993772,2017-08-12,1,GROCERY I,1630.00,28
2995554,2995554,2017-08-13,1,GROCERY I,952.00,19
2997336,2997336,2017-08-14,1,GROCERY I,2407.00,30


In [74]:
from darts.models import LightGBMModel
from darts.utils.utils import ModelMode, SeasonalityMode

model = LightGBMModel(lags=14)
a = model.fit(data)
model.predict(10)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000248 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3570
[LightGBM] [Info] Number of data points in the train set: 1674, number of used features: 14
[LightGBM] [Info] Start training from score 2227.901135
