In [1]:
from autots import AutoTS,model_forecast
from autots.datasets import load_hourly
import pandas as pd
import numpy as np
from openpyxl import load_workbook
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf
import pmdarima as pm
from autots import load_daily, model_forecast



In [2]:
def update_excel_intensities(filename,df,sheet,mode='a'):
    if mode=='a':
        with pd.ExcelWriter(filename,mode=mode, if_sheet_exists='replace') as writer:  
            df.to_excel(writer, sheet_name=sheet)
    else:
        with pd.ExcelWriter(filename,mode=mode) as writer:  
            df.to_excel(writer, sheet_name=sheet)
    
def read_excel_sheet(file,sheetname):
    customers = pd.read_excel(file,
        sheet_name=sheetname,
        header=0,
        index_col=0,
        keep_default_na=True
        )
    return customers

def read_csv(file,sheetname):
    customers = pd.read_csv(file,
        header=0,
        sep=';',
        index_col=0,
        keep_default_na=True
        )
    
    customers.fillna(0,inplace=True)
    
    return customers

def div100(x):
    return x/100.0

def mul100(x):
    return x*100.0

def logistic(x):
    return np.log(x/(1-x))

def invlogistic(x):
    return np.exp(x)/(1+np.exp(x))

def onex(x):
    return x/(1-x)

def invonex(x):
    return x/(1+x)

def onex2(x):
    return x*x/(1-x*x)

def invonex2(x):
    return np.sqrt(x/(1+x))

In [3]:
#data = read_excel_sheet('data/predictions_backtesting_2023.xlsx','2022')
data = read_excel_sheet('data/testdata_2024.xlsx','data')
data=data[data.index<2024]#iloc[:10,:]


ValueError: Worksheet named '2022' not found

In [None]:
#data=data.reset_index(drop=True)
#data.index2 = data.index
#display(data.index)
#for k,v in enumerate(x):
#    data.loc[str(v),'index2'] = str(v)
#data=data.set_index(data.index2,inplace=True)
data.index = pd.to_datetime(data.index,format='%Y')
display(data)


In [None]:

#df_wide = load_hourly(long=False)
#display(df_wide)

# here we care most about traffic volume, all other series assumed to be weight of 1
weights_hourly = {} #'traffic_volume': 20}

model_list = [
    #'ETS',
    'VECM',
    #'Theta',
    'ARDL',
    #'RollingRegression',
    #'FBProphet',
    'KalmanStateSpace',
    #'WindowRegression',
    #'UnivariateRegression',
    #'PytorchForecasting',
    'LATC',
    #'Constan
]

model = AutoTS(
    forecast_length=10,
    frequency='infer',
    prediction_interval=0.90,
    no_negatives=True,
    ensemble=['all'],
    max_generations=5,
    num_validations=1,
    validation_method='Backwards',
    model_list=model_list,
    transformer_list='all',
    models_to_validate=0.2,
    drop_most_recent=0,
    n_jobs='auto',
)

model = model.fit(
    data
)

prediction = model.predict()
forecasts_df = prediction.forecast
# prediction.long_form_results()


In [None]:
full_ensemble=pd.concat([data,forecasts_df])

In [None]:
full_ensemble

In [None]:
plt.plot(full_ensemble.avioituvuus_30_34)
plt.plot(full_ensemble.avioituvuus_25_29)
plt.show()
plt.plot(full_ensemble.sisempikaupunki)
plt.show()
plt.plot(full_ensemble.tfr)

In [None]:
model_list = [
    #'UnobservedComponents',
    #'GLS',
    #'ARCH',
    #'NeuralProphet',
    #'GLM',
    #'ETS',
    'VECM',
    #'Theta',
    'ARDL',
    #'RollingRegression',
    #'FBProphet',
    'KalmanStateSpace',
    #'WindowRegression',
    #'UnivariateRegression',
    #'PytorchForecasting',
    'LATC',
    #'ConstantNaive',
    #'LastValueNaive',
    #'AverageValueNaive',
#    'VARMAX',
#    'ARIMA'
]


def pred_model(m,df):
    df_forecast = model_forecast(
        model_name=m,
        model_param_dict={}, #'method': 'Mean'},
        model_transform_dict={
            #'fillna': 'mean',
            #'transformations': {'0': 'DifferencedTransformer'},
            #'transformation_params': {'0': {}}
        },
        df_train=df,
        forecast_length=10,
        frequency='infer',
        prediction_interval=0.9,
        no_negatives=False,
        # future_regressor_train=future_regressor_train2d,
        # future_regressor_forecast=future_regressor_forecast2d,
        random_seed=321,
        verbose=0,
        n_jobs="auto",
    )
    fd=df_forecast.forecast.head(10)
    
    return fd

def predict_models(model_list,data,transform=False):
    if transform:
        modata=data.copy().apply(div100).apply(onex2).apply(mul100)
    else:
        modata=data.copy()
    #print(modata)
    cols=list(data.columns)
    #print(cols,type(cols))
    mlist=model_list.copy()
    mlist.extend(['full'])
    print(mlist,model_list)
    ind = pd.MultiIndex.from_product([mlist,cols], names=['models','datasets'])
    full=pd.DataFrame(columns=ind)
    print(full)
    for m in model_list:
        print(m)
        if transform:
            fd=pred_model(m,modata).apply(div100).apply(invonex2).apply(mul100)
        else:
            fd=pred_model(m,modata)
        print(fd)
        full[m]=pd.concat([data,fd])
    print(fd,full_ensemble)
    print(pd.concat([data,fd]).shape,full_ensemble.shape)
    full['full']=full_ensemble
        
    return full

def plot_model(fulldf,dataset='avioituvuus_30_34'):
    mlist=model_list.copy()
    mlist.extend(['full'])
    for m in mlist:
        plt.plot(fulldf[m][dataset],label=m)    
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.title(dataset)
    plt.show()


In [None]:
forecast=predict_models(model_list,data)

In [None]:
plot_model(forecast,'avioituvuus_30_34')
plot_model(forecast,'avioituvuus_25_29')
plot_model(forecast,'sisempikaupunki')
plot_model(forecast,'alypuhelin')
plot_model(forecast,'maaseudulla')
plot_model(forecast,'some_25_34')
plot_model(forecast,'some_35_44')
plot_model(forecast,'tfr')