In [1]:
import requests 
import pickle
import numpy as np
from sklearn.linear_model import LinearRegression
from metrics_fast import calculate_metrics

# URL-filename mapping
url_filename_map = {
    'https://osf.io/84yux': 'test_samples_7L_split1.pkl',
    'https://osf.io/z67hd': 'test_samples_7L_split2.pkl',
    'https://osf.io/3drhs': 'test_samples_7L_split3.pkl'
}

# download and save each file
for url, filename in url_filename_map.items():
    url = url + '/download'

    # send a GET request to the URL
    response = requests.get(url)

    # check if the request was successful (HTTP status code 200)
    if response.status_code == 200:
        # open the file in binary write mode and
        # write the response content to it
        with open(filename, 'wb') as f:
            f.write(response.content)

        # print a success message
        print('Successfully downloaded file: {}'.format(filename))

# load the downloaded data from the files
split1 = pickle.load(open('test_samples_7L_split1.pkl', 'rb'))
split2 = pickle.load(open('test_samples_7L_split2.pkl', 'rb'))
split3 = pickle.load(open('test_samples_7L_split3.pkl', 'rb'))

# concatenate splits to rebuild original array
x, y = (np.concatenate([split1[0], split2[0], split3[0]]), np.concatenate([split1[1], split2[1], split3[1]]))

# set forecast_window to the number of columns in y
forecast_window = y.shape[1]

# split x and y into train and test sets
x_train, y_train = x[:, :-forecast_window], x[:, -forecast_window:]
x_test, y_test = x[:, forecast_window:], y

# print the shapes of the train and test sets
print('Training set shapes: x_train={}, y_train={}'.format(x_train.shape, y_train.shape))
print('Test set shapes: x_test={}, y_test={}'.format(x_test.shape, y_test.shape))

# create a LinearRegression model
model = LinearRegression()

# fit the model on the training data
model.fit(x_train, y_train)

# predict y_hat for the test data
y_hat = model.predict(x_test)

# print the shape of y_hat and y
print('Prediction shapes: y_hat={}, y={}'.format(y_hat.shape, y.shape))

# calculate the metrics for the predictions
metrics = calculate_metrics(y, y_hat, x)

# calculate the average metrics
average_metrics = {k: np.mean(v) for k, v in metrics.items()}

# print the average metrics
print('Average Metrics:')
for k, v in average_metrics.items():
    print('{}: {:.3f}'.format(k, v))

Successfully downloaded file: test_samples_7L_split1.pkl
Successfully downloaded file: test_samples_7L_split2.pkl
Successfully downloaded file: test_samples_7L_split3.pkl
Training set shapes: x_train=(4171113, 291), y_train=(4171113, 48)
Test set shapes: x_test=(4171113, 291), y_test=(4171113, 48)
Prediction shapes: y_hat=(4171113, 48), y=(4171113, 48)
Average Metrics:
mase: 102.011
rmsse: 17.352
mae: 9.432
rmse: 18.199
mape: 942.248
smape: 124.617
r2: -inf
