In [1]:
!pip install autots
# Read in data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import re
import time
import datetime
from sklearn.model_selection import train_test_split
from autots import AutoTS



In [2]:
import warnings

warnings.filterwarnings("ignore")

# Import data

In [3]:
#df

In [4]:
# Define years and months to load data from both 2023 and 2024
years = [2023, 2024]
months = range(1, 13)

# Load datasets dynamically for both years
data_files = [
    f"ig_data-{year}/ig_data_{str(month).zfill(2)}-{year}_u.csv"
    for year in years
    for month in months
]

datasets = [pd.read_csv(file) for file in data_files]

# Add month and year columns
for i, (year, month) in enumerate([(y, m) for y in years for m in months]):
    datasets[i]["month"] = month
    datasets[i]["year"] = year

In [5]:
# Concatenate data
data = pd.concat(datasets)
data

Unnamed: 0,date,profile_id,followers,posts,engagement,likes,comments,reach,impressions,month,year
0,2023-01-01,26dbe87d9f80099370c8e724c31eb0f3731afc2aff5f62...,20558,0,0,0,0,0,0,1,2023
1,2023-01-01,31832168975075c19bfca97103d24b2525b75235b2e61a...,5256,0,0,0,0,0,0,1,2023
2,2023-01-01,065c756ab25ca5147325477d859c320577aa171e55d99b...,337483,1,149,145,4,4101,4511,1,2023
3,2023-01-01,ed8304331e1ba4cb7828e07b5f768f67b622a50ea5e25a...,62556,0,0,0,0,0,0,1,2023
4,2023-01-01,27178b23de82c1fd075ab6e928dec79ea604d5afb2e4c4...,359455,1,3997,3509,488,57388,63127,1,2023
...,...,...,...,...,...,...,...,...,...,...,...
547988,2024-12-31,8c1ac76cb45374c9710165d154d153ddf48bc30c253c8e...,5905,0,0,0,0,0,0,12,2024
547989,2024-12-31,834715aec99b1e9be4feccadeea4712f1cde42d649664d...,211245,1,345,337,8,10157,11172,12,2024
547990,2024-12-31,94528532ec6bee015266fdaeb9cd570a6fbf6e3a2dfbc2...,1660,0,0,0,0,0,0,12,2024
547991,2024-12-31,0597c04c318d668379c1d549c31931c56838f6865ae7f3...,10084,1,28,28,0,2914,3205,12,2024


In [6]:
# find in the column followers the rows that have the value as "undefined"
data[data["followers"] == "undefined"]
# replace the "undefined" in follwoers with nan
data["followers_num"] = data["followers"].replace("undefined", np.nan)
# convert the followers_num to integer
data["followers_num"] = data["followers_num"].astype(float).astype(pd.Int64Dtype())
# Ensure followers_num is numeric and clean
data["followers_num"] = pd.to_numeric(data["followers_num"], errors="coerce")

# Step 4: Replace zeros with NaN (to avoid filling with zero)
data["followers_num"].replace(0, np.nan, inplace=True)

# Step 5: Fill NaN values using the last non-null value, making sure it isn't zero
data["followers_num"] = data["followers_num"].fillna(method="ffill")

# Step 6: If still NaN (if there were no valid previous non-null value), you can set to zero or some other value
data["followers_num"].fillna(0, inplace=True)

# Step 7: Ensure the column type is integer, but with nullable integer type to handle missing values
data["followers_num"] = data["followers_num"].astype(pd.Int64Dtype())

# Verify the changes
print(data.head())


         date                                         profile_id followers  \
0  2023-01-01  26dbe87d9f80099370c8e724c31eb0f3731afc2aff5f62...     20558   
1  2023-01-01  31832168975075c19bfca97103d24b2525b75235b2e61a...      5256   
2  2023-01-01  065c756ab25ca5147325477d859c320577aa171e55d99b...    337483   
3  2023-01-01  ed8304331e1ba4cb7828e07b5f768f67b622a50ea5e25a...     62556   
4  2023-01-01  27178b23de82c1fd075ab6e928dec79ea604d5afb2e4c4...    359455   

   posts  engagement  likes  comments  reach  impressions  month  year  \
0      0           0      0         0      0            0      1  2023   
1      0           0      0         0      0            0      1  2023   
2      1         149    145         4   4101         4511      1  2023   
3      0           0      0         0      0            0      1  2023   
4      1        3997   3509       488  57388        63127      1  2023   

   followers_num  
0          20558  
1           5256  
2         337483  
3         

In [7]:
data.isnull().sum()

date             0
profile_id       0
followers        0
posts            0
engagement       0
likes            0
comments         0
reach            0
impressions      0
month            0
year             0
followers_num    0
dtype: int64

In [8]:
data[data["followers_num"] == "undefined"]

Unnamed: 0,date,profile_id,followers,posts,engagement,likes,comments,reach,impressions,month,year,followers_num


In [9]:
profile_counts = data["profile_id"].value_counts()
# # get the ids of the accounts that have more than 365 records
# more_than_299_ids = profile_counts[profile_counts >= 300].index
# # filter the data to include only the accounts that have more than 365 records
# more_than_299_data = data[data["profile_id"].isin(more_than_299_ids)]
# get the ids of the accounts that have more than 365 records
more_than_365_ids = profile_counts[profile_counts > 365].index
# filter the data to include only the accounts that have more than 365 records
more_than_365_data = data[data["profile_id"].isin(more_than_365_ids)]
# get the ids of the accounts that have 300 to 365 records
between_365_300_ids = profile_counts[
    (profile_counts <= 365) & (profile_counts >= 300)
].index
# filter the data to include only the accounts that have 300 to 365 records
between_365_300_data = data[data["profile_id"].isin(between_365_300_ids)]
# get the ids of the accounts that have 90 to 299 records
between_299_90_ids = profile_counts[
    (profile_counts < 300) & (profile_counts >= 90)
].index
# filter the data to include only the accounts that have 90 to 299 records
between_299_90_data = data[data["profile_id"].isin(between_299_90_ids)]

In [12]:
# Export more than 365 data to csv
between_365_300_data.to_csv("between_365_300_data.csv", index=False)

# Export more than 365 data to csv
between_299_90_data.to_csv("between_299_90_data.csv", index=False)

# Weekly
## 365+

In [61]:
# Convert date column to datetime
more_than_365_data["date"] = pd.to_datetime(more_than_365_data["date"])

# Extract year and week number
more_than_365_data["year"] = more_than_365_data["date"].dt.year
more_than_365_data["week"] = more_than_365_data["date"].dt.isocalendar().week  # ISO week number

# Aggregate: Keep last follower count per week for each profile
weekly_df = (
    more_than_365_data.sort_values(["profile_id", "date"])
    .groupby(["profile_id", "year", "week"])
    .last()
    .reset_index()
)

# Convert back to a proper date format (set to Monday of the given week)
weekly_df["date"] = pd.to_datetime(weekly_df["year"].astype(str) + weekly_df["week"].astype(str) + '1', format='%G%V%w')

# Drop unnecessary columns
weekly_df = weekly_df[["date", "profile_id", "followers_num"]]

# Set a random seed for reproducibility
np.random.seed(42)  # You can use any number as the seed

# Extract unique IDs from the DataFrame
unique_ids = weekly_df['profile_id'].unique()

# Randomly select 60 IDs
selected_ids = np.random.choice(unique_ids, size=60, replace=False)

# Filter the DataFrame to include only the selected IDs
weekly_df = weekly_df[weekly_df['profile_id'].isin(selected_ids)]

# Verify the number of unique IDs in the filtered DataFrame
print(f"Number of unique IDs in filtered DataFrame: {weekly_df['profile_id'].nunique()}")

weekly_df


Number of unique IDs in filtered DataFrame: 60


Unnamed: 0,date,profile_id,followers_num
28140,2023-02-13,02170cc6ad8d4839d4b103387977def165bea9631fd28d...,1239
28141,2023-02-20,02170cc6ad8d4839d4b103387977def165bea9631fd28d...,4043461
28142,2023-02-27,02170cc6ad8d4839d4b103387977def165bea9631fd28d...,4043256
28143,2023-03-06,02170cc6ad8d4839d4b103387977def165bea9631fd28d...,4042223
28144,2023-03-13,02170cc6ad8d4839d4b103387977def165bea9631fd28d...,103218
...,...,...,...
3542705,2024-06-17,ff30e89453543049bbac3f7e1ae07987f2163893b2c3c4...,35057
3542706,2024-06-24,ff30e89453543049bbac3f7e1ae07987f2163893b2c3c4...,207105
3542707,2024-07-01,ff30e89453543049bbac3f7e1ae07987f2163893b2c3c4...,229471
3542708,2024-07-08,ff30e89453543049bbac3f7e1ae07987f2163893b2c3c4...,74189


In [62]:
weekly_df.fillna(method='ffill', inplace=True)
weekly_df.fillna(method='bfill', inplace=True)
weekly_df.fillna(0, inplace=True)
# Split the data into training and testing sets
train_df = weekly_df[weekly_df['date'] < '2024-07-01']
test_df = weekly_df[weekly_df['date'] >= '2024-07-01']

# Pivot and prepare for AutoTS
def prepare_for_autots(df):
    df = df.pivot(index='date', columns='profile_id', values='followers_num')
    df = df.fillna(method='ffill')  # Forward-fill missing values
    df.index = df.index.to_period('W').to_timestamp('W')  # Convert index to week-end format
    df = df.fillna(method='ffill')
    return df

train_df = prepare_for_autots(train_df)
test_df = prepare_for_autots(test_df)

# Ensure no missing values
train_df.fillna(method='ffill', inplace=True)
train_df.fillna(method='bfill', inplace=True)
train_df.fillna(0, inplace=True)

test_df.fillna(method='ffill', inplace=True)
test_df.fillna(method='bfill', inplace=True)
test_df.fillna(0, inplace=True)


In [63]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from autots import AutoTS

# Store evaluation results
evaluation_results_w = []

# ----- AUTOTS MODEL -----
# Initialize and train the AutoTS model
model = AutoTS(
    forecast_length=len(test_df),  
    frequency='W',  # Changed frequency to weekly
    ensemble='simple',
    max_generations=10,
    num_validations=3,
    validation_method='backwards'
)

# Train the model
model = model.fit(train_df)

# Generate predictions
predictions = model.predict(prediction_interval=0.95, fail_on_forecast_nan=False)
forecast_df = predictions.forecast.fillna(0)

# Align test data with predictions
test_df_aligned = test_df.loc[forecast_df.index]

# Evaluate AutoTS model
for profile_id in test_df_aligned.columns:
    true_values = test_df_aligned[profile_id]
    predicted_values = forecast_df[profile_id]
    mae_autots = mean_absolute_error(true_values, predicted_values)
    mse_autots = mean_squared_error(true_values, predicted_values)
    evaluation_results_w.append({'Profile': profile_id, 'MAE_AutoTS_w_365': mae_autots, 'MSE_AutoTS_w_365': mse_autots})

# Convert to DataFrame
evaluation_df_w = pd.DataFrame(evaluation_results_w)

Using 4 cpus for n_jobs.
Data frequency is: W-SUN, used frequency is: W
Too many training validations for length of data provided, decreasing num_validations to 1
Model Number: 1 with model AverageValueNaive in generation 0 of 10
Model Number: 2 with model AverageValueNaive in generation 0 of 10
Model Number: 3 with model AverageValueNaive in generation 0 of 10
Model Number: 4 with model DatepartRegression in generation 0 of 10
Model Number: 5 with model DatepartRegression in generation 0 of 10




Model Number: 6 with model DatepartRegression in generation 0 of 10
Model Number: 7 with model DatepartRegression in generation 0 of 10
Template Eval Error: ImportError('Tensorflow not available, install with pip install tensorflow.') in model 7 in generation 0: DatepartRegression
Model Number: 8 with model ETS in generation 0 of 10
Model Number: 9 with model ETS in generation 0 of 10
Model Number: 10 with model GLM in generation 0 of 10


  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Model Number: 11 with model GLM in generation 0 of 10
Model Number: 12 with model GLS in generation 0 of 10
Model Number: 13 with model GLS in generation 0 of 10
Model Number: 14 with model LastValueNaive in generation 0 of 10
Model Number: 15 with model LastValueNaive in generation 0 of 10
Model Number: 16 with model LastValueNaive in generation 0 of 10
Model Number: 17 with model LastValueNaive in generation 0 of 10
Model Number: 18 with model SeasonalNaive in generation 0 of 10
Model Number: 19 with model SeasonalNaive in generation 0 of 10
Model Number: 20 with model SeasonalNaive in generation 0 of 10
Model Number: 21 with model VAR in generation 0 of 10
Template Eval Error: ValueError('maxlags is too large for the number of observations and the number of equations. The largest model cannot be estimated.') in model 21 in generation 0: VAR
Model Number: 22 with model VAR in generation 0 of 10
Template Eval Error: ValueError('maxlags is too large for the number of observations and t



Model Number: 96 with model LastValueNaive in generation 0 of 10




Model Number: 97 with model VAR in generation 0 of 10
Template Eval Error: ValueError('maxlags is too large for the number of observations and the number of equations. The largest model cannot be estimated.') in model 97 in generation 0: VAR
Model Number: 98 with model ETS in generation 0 of 10
Template Eval Error: ValueError('Model ETS returned NaN for one or more series. fail_on_forecast_nan=True') in model 98 in generation 0: ETS
Model Number: 99 with model Cassandra in generation 0 of 10
Model Number: 100 with model SeasonalityMotif in generation 0 of 10
Model Number: 101 with model SectionalMotif in generation 0 of 10
Template Eval Error: ValueError('XA and XB must have the same number of columns (i.e. feature dimension.)') in model 101 in generation 0: SectionalMotif
Model Number: 102 with model GLS in generation 0 of 10
Model Number: 103 with model GLS in generation 0 of 10
Model Number: 104 with model ARCH in generation 0 of 10
Template Eval Error: ImportError('`arch` package m

  endog * np.log(endog / mu) + (mu - endog))
  endog * np.log(endog / mu) + (mu - endog))
  endog * np.log(endog / mu) + (mu - endog))
  endog * np.log(endog / mu) + (mu - endog))


Template Eval Error: ValueError('The first guess on the deviance function returned a nan.  This could be a boundary  problem and should be reported.') in model 149 in generation 0: GLM
Model Number: 150 with model ARCH in generation 0 of 10
Template Eval Error: ImportError('`arch` package must be installed from pip') in model 150 in generation 0: ARCH
Model Number: 151 with model ConstantNaive in generation 0 of 10
Model Number: 152 with model FFT in generation 0 of 10
Model Number: 153 with model LastValueNaive in generation 0 of 10
Model Number: 154 with model ConstantNaive in generation 0 of 10
Template Eval Error: Exception("Transformer AlignLastValue failed on fit from params rolling_mean_24 {'0': {'rows': 1, 'lag': 84, 'method': 'additive', 'strength': 1.0, 'first_value_only': False, 'threshold': 1, 'threshold_method': 'mean'}, '1': {}} with error IndexError('single positional indexer is out-of-bounds')") in model 154 in generation 0: ConstantNaive
Model Number: 155 with model Me

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Model Number: 172 with model ConstantNaive in generation 1 of 10
Template Eval Error: Exception("Transformer AlignLastValue failed on fit from params ffill {'0': {'rows': 1, 'lag': 84, 'method': 'additive', 'strength': 0.7, 'first_value_only': False, 'threshold': 1, 'threshold_method': 'mean'}, '1': {'model': 'Linear', 'phi': 1, 'window': None, 'transform_dict': None}} with error IndexError('single positional indexer is out-of-bounds')") in model 172 in generation 1: ConstantNaive
Model Number: 173 with model ETS in generation 1 of 10
Model Number: 174 with model ARDL in generation 1 of 10
Model Number: 175 with model RRVAR in generation 1 of 10
Model Number: 176 with model GLS in generation 1 of 10
Model Number: 177 with model GLM in generation 1 of 10
Model Number: 178 with model BasicLinearModel in generation 1 of 10
Model Number: 179 with model RRVAR in generation 1 of 10
Model Number: 180 with model AverageValueNaive in generation 1 of 10
Model Number: 181 with model BasicLinearMo

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Model Number: 196 with model ConstantNaive in generation 1 of 10
Model Number: 197 with model SeasonalNaive in generation 1 of 10
Model Number: 198 with model RRVAR in generation 1 of 10
Model Number: 199 with model SectionalMotif in generation 1 of 10
Model Number: 200 with model SeasonalNaive in generation 1 of 10
Model Number: 201 with model MetricMotif in generation 1 of 10
Model Number: 202 with model DatepartRegression in generation 1 of 10
Model Number: 203 with model ETS in generation 1 of 10
Model Number: 204 with model DatepartRegression in generation 1 of 10
Model Number: 205 with model LastValueNaive in generation 1 of 10
Model Number: 206 with model SectionalMotif in generation 1 of 10
Model Number: 207 with model LastValueNaive in generation 1 of 10
Model Number: 208 with model GLM in generation 1 of 10


  endog_mu = self._clean(endog / mu)
  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)
  endog_mu = self._clean(endog / mu)
  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)
  endog_mu = self._clean(endog / mu)
  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)
  endog_mu = self._clean(endog / mu)
  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)


Model Number: 209 with model SectionalMotif in generation 1 of 10
Model Number: 210 with model ETS in generation 1 of 10
Template Eval Error: ValueError("Model returned NaN due to a preprocessing transformer {'fillna': 'mean', 'transformations': {'0': 'AlignLastValue', '1': 'PowerTransformer', '2': 'Slice', '3': 'FFTDecomposition', '4': 'DifferencedTransformer'}, 'transformation_params': {'0': {'rows': 1, 'lag': 1, 'method': 'additive', 'strength': 1.0, 'first_value_only': False, 'threshold': 10, 'threshold_method': 'max'}, '1': {}, '2': {'method': 100}, '3': {'n_harmonics': None, 'detrend': 'quadratic'}, '4': {'lag': 1, 'fill': 'zero'}}}. fail_on_forecast_nan=True") in model 210 in generation 1: ETS
Model Number: 211 with model SectionalMotif in generation 1 of 10
Model Number: 212 with model FFT in generation 1 of 10
Model Number: 213 with model SeasonalityMotif in generation 1 of 10
Model Number: 214 with model SeasonalityMotif in generation 1 of 10
Model Number: 215 with model Sect



Model Number: 228 with model DatepartRegression in generation 1 of 10
Template Eval Error: Exception("Transformer HolidayTransformer failed on fit from params quadratic {'0': {'threshold': 0.7, 'splash_threshold': None, 'use_dayofmonth_holidays': True, 'use_wkdom_holidays': True, 'use_wkdeom_holidays': False, 'use_lunar_holidays': True, 'use_lunar_weekday': False, 'use_islamic_holidays': False, 'use_hebrew_holidays': False, 'use_hindu_holidays': False, 'anomaly_detector_params': {'method': 'minmax', 'method_params': {'alpha': 0.03}, 'fillna': 'rolling_mean_24', 'transform_dict': None, 'isolated_only': False, 'on_inverse': False}, 'remove_excess_anomalies': True, 'impact': 'regression', 'regression_params': {}}, '1': {}, '2': {'model': 'Linear', 'changepoint_spacing': 5040, 'changepoint_distance_end': 360, 'datepart_method': 'simple_binarized'}} with error ValueError('more than 1 year of data is required for holiday detection.')") in model 228 in generation 1: DatepartRegression
Model N



Model Number: 236 with model UnivariateMotif in generation 1 of 10
Template Eval Error: ValueError('Model UnivariateMotif returned NaN for one or more series. fail_on_forecast_nan=True') in model 236 in generation 1: UnivariateMotif
Model Number: 237 with model RRVAR in generation 1 of 10


  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  avg = avg_as_array = np.multiply(a, wgt,
  avg = avg_as_array = np.multiply(a, wgt,


Model Number: 238 with model SeasonalNaive in generation 1 of 10
Model Number: 239 with model BasicLinearModel in generation 1 of 10
Model Number: 240 with model RRVAR in generation 1 of 10
Model Number: 241 with model WindowRegression in generation 1 of 10
Model Number: 242 with model FFT in generation 1 of 10
Model Number: 243 with model SeasonalityMotif in generation 1 of 10
Model Number: 244 with model DatepartRegression in generation 1 of 10
Template Eval Error: ModuleNotFoundError("No module named 'tensorflow'") in model 244 in generation 1: DatepartRegression
Model Number: 245 with model ETS in generation 1 of 10
Model Number: 246 with model Cassandra in generation 1 of 10
Template Eval Error: ValueError('negative dimensions are not allowed') in model 246 in generation 1: Cassandra
Model Number: 247 with model AverageValueNaive in generation 1 of 10
Model Number: 248 with model SeasonalityMotif in generation 1 of 10
Model Number: 249 with model SeasonalNaive in generation 1 of 1



Model Number: 303 with model GLM in generation 2 of 10
Model Number: 304 with model SeasonalityMotif in generation 2 of 10
Model Number: 305 with model SeasonalityMotif in generation 2 of 10
Model Number: 306 with model ETS in generation 2 of 10


  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Model Number: 307 with model GLM in generation 2 of 10
Template Eval Error: ValueError('The first guess on the deviance function returned a nan.  This could be a boundary  problem and should be reported.') in model 307 in generation 2: GLM
Model Number: 308 with model DatepartRegression in generation 2 of 10


  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)
  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)


Model Number: 309 with model LastValueNaive in generation 2 of 10
Model Number: 310 with model SeasonalNaive in generation 2 of 10
Model Number: 311 with model GLS in generation 2 of 10
Model Number: 312 with model DatepartRegression in generation 2 of 10
Model Number: 313 with model MetricMotif in generation 2 of 10
Model Number: 314 with model BasicLinearModel in generation 2 of 10
Model Number: 315 with model ARDL in generation 2 of 10
Model Number: 316 with model SeasonalNaive in generation 2 of 10
Model Number: 317 with model SeasonalityMotif in generation 2 of 10
Model Number: 318 with model AverageValueNaive in generation 2 of 10
Model Number: 319 with model ARDL in generation 2 of 10
Model Number: 320 with model WindowRegression in generation 2 of 10
Template Eval Error: ValueError("regression_type='User' but no future_regressor passed") in model 320 in generation 2: WindowRegression
Model Number: 321 with model ConstantNaive in generation 2 of 10
Model Number: 322 with model S



Model Number: 384 with model SectionalMotif in generation 3 of 10
Model Number: 385 with model LastValueNaive in generation 3 of 10
Model Number: 386 with model SeasonalNaive in generation 3 of 10
Model Number: 387 with model BasicLinearModel in generation 3 of 10
Model Number: 388 with model WindowRegression in generation 3 of 10
Template Eval Error: ValueError("regression_type='User' but no future_regressor passed") in model 388 in generation 3: WindowRegression
Model Number: 389 with model BasicLinearModel in generation 3 of 10
Model Number: 390 with model DatepartRegression in generation 3 of 10
Model Number: 391 with model GLS in generation 3 of 10
Model Number: 392 with model DatepartRegression in generation 3 of 10




Model Number: 393 with model GLM in generation 3 of 10
Model Number: 394 with model LastValueNaive in generation 3 of 10
Model Number: 395 with model WindowRegression in generation 3 of 10
Template Eval Error: Exception("Transformer QuantileTransformer failed on fit from params ffill {'0': {'method': 'minmax', 'method_params': {'alpha': 0.05}, 'fillna': 'fake_date', 'transform_dict': None, 'isolated_only': False, 'on_inverse': False}, '1': {'output_distribution': 'uniform', 'n_quantiles': 17}, '2': {'center': 'mean'}, '3': {}, '4': {'fillna': 'ffill', 'center': 'median'}, '5': {}} with error ValueError('Shape of passed values is (50, 60), indices imply (52, 60)')") in model 395 in generation 3: WindowRegression
Model Number: 396 with model AverageValueNaive in generation 3 of 10
Model Number: 397 with model ARDL in generation 3 of 10


  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Model Number: 398 with model GLM in generation 3 of 10
Model Number: 399 with model SeasonalityMotif in generation 3 of 10
Model Number: 400 with model SeasonalityMotif in generation 3 of 10
Template Eval Error: ValueError("Model returned NaN due to a preprocessing transformer {'fillna': 'ffill', 'transformations': {'0': 'CumSumTransformer', '1': 'PowerTransformer', '2': 'EWMAFilter', '3': 'AlignLastValue'}, 'transformation_params': {'0': {}, '1': {}, '2': {'span': 10}, '3': {'rows': 168, 'lag': 84, 'method': 'additive', 'strength': 0.2, 'first_value_only': False, 'threshold': 1, 'threshold_method': 'max'}}}. fail_on_forecast_nan=True") in model 400 in generation 3: SeasonalityMotif
Model Number: 401 with model ETS in generation 3 of 10
Model Number: 402 with model LastValueNaive in generation 3 of 10
Model Number: 403 with model ARDL in generation 3 of 10
Model Number: 404 with model ETS in generation 3 of 10
Template Eval Error: ValueError("Model returned NaN due to a preprocessing t

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Model Number: 427 with model SeasonalityMotif in generation 3 of 10
Model Number: 428 with model RRVAR in generation 3 of 10
Model Number: 429 with model SeasonalityMotif in generation 3 of 10
Model Number: 430 with model FFT in generation 3 of 10
Model Number: 431 with model SeasonalNaive in generation 3 of 10
Model Number: 432 with model SeasonalityMotif in generation 3 of 10
Model Number: 433 with model AverageValueNaive in generation 3 of 10
Model Number: 434 with model AverageValueNaive in generation 3 of 10
Model Number: 435 with model AverageValueNaive in generation 3 of 10
Model Number: 436 with model AverageValueNaive in generation 3 of 10
Model Number: 437 with model SeasonalityMotif in generation 3 of 10
Model Number: 438 with model RRVAR in generation 3 of 10
Model Number: 439 with model ConstantNaive in generation 3 of 10
Model Number: 440 with model DatepartRegression in generation 3 of 10
Model Number: 441 with model AverageValueNaive in generation 3 of 10
Model Number: 

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Model Number: 448 with model SeasonalNaive in generation 3 of 10
Model Number: 449 with model SeasonalNaive in generation 3 of 10
Template Eval Error: Exception("Transformer AlignLastValue failed on fit from params ffill {'0': {}, '1': {'lag': 1, 'fill': 'bfill'}, '2': {}, '3': {}, '4': {}, '5': {'rows': 1, 'lag': 84, 'method': 'additive', 'strength': 0.9, 'first_value_only': False, 'threshold': 1, 'threshold_method': 'mean'}} with error IndexError('single positional indexer is out-of-bounds')") in model 449 in generation 3: SeasonalNaive
Model Number: 450 with model WindowRegression in generation 3 of 10
Template Eval Error: ValueError('Input X contains NaN.\nElasticNet does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transform



Model Number: 469 with model MetricMotif in generation 3 of 10
Model Number: 470 with model SeasonalNaive in generation 3 of 10
New Generation: 4 of 10
Model Number: 471 with model ETS in generation 4 of 10
Model Number: 472 with model ETS in generation 4 of 10
Model Number: 473 with model FFT in generation 4 of 10
Model Number: 474 with model AverageValueNaive in generation 4 of 10
Model Number: 475 with model RRVAR in generation 4 of 10
Model Number: 476 with model RRVAR in generation 4 of 10
Model Number: 477 with model Cassandra in generation 4 of 10
Template Eval Error: Exception("Transformer FIRFilter failed on inverse from params ffill {'0': {'method': 'clip', 'std_threshold': 4, 'fillna': None}, '1': {'numtaps': 128, 'cutoff_hz': 50, 'window': 'hamming', 'sampling_frequency': 10, 'on_transform': False, 'on_inverse': True}, '2': {'rows': 2, 'lag': 1, 'method': 'additive', 'strength': 1.0, 'first_value_only': False, 'threshold': None, 'threshold_method': 'max'}, '3': {'method': '

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Model Number: 529 with model WindowRegression in generation 4 of 10
Model Number: 530 with model DatepartRegression in generation 4 of 10
Model Number: 531 with model SectionalMotif in generation 4 of 10
Model Number: 532 with model LastValueNaive in generation 4 of 10
Model Number: 533 with model FFT in generation 4 of 10
Model Number: 534 with model AverageValueNaive in generation 4 of 10
Model Number: 535 with model WindowRegression in generation 4 of 10
Template Eval Error: ValueError('at least one array or dtype is required') in model 535 in generation 4: WindowRegression
Model Number: 536 with model SeasonalNaive in generation 4 of 10
Model Number: 537 with model LastValueNaive in generation 4 of 10
Model Number: 538 with model ARDL in generation 4 of 10
Model Number: 539 with model SeasonalityMotif in generation 4 of 10
Template Eval Error: ValueError('Model SeasonalityMotif returned NaN for one or more series. fail_on_forecast_nan=True') in model 539 in generation 4: Seasonalit

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Template Eval Error: ValueError('The first guess on the deviance function returned a nan.  This could be a boundary  problem and should be reported.') in model 615 in generation 5: GLM
Model Number: 616 with model AverageValueNaive in generation 5 of 10
Model Number: 617 with model ARDL in generation 5 of 10


  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)
  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)
  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)
  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)
  endog_mu = self._clean(endog / mu)


Model Number: 618 with model SeasonalityMotif in generation 5 of 10
Template Eval Error: ValueError("Model returned NaN due to a preprocessing transformer {'fillna': 'mean', 'transformations': {'0': 'Log', '1': 'PctChangeTransformer'}, 'transformation_params': {'0': {}, '1': {}}}. fail_on_forecast_nan=True") in model 618 in generation 5: SeasonalityMotif
Model Number: 619 with model LastValueNaive in generation 5 of 10
Model Number: 620 with model DatepartRegression in generation 5 of 10
Model Number: 621 with model ETS in generation 5 of 10
Model Number: 622 with model GLS in generation 5 of 10
Model Number: 623 with model SectionalMotif in generation 5 of 10
Model Number: 624 with model DatepartRegression in generation 5 of 10
Model Number: 625 with model RRVAR in generation 5 of 10
Model Number: 626 with model DatepartRegression in generation 5 of 10
Template Eval Error: ValueError("regression_type='User' but no future_regressor passed") in model 626 in generation 5: DatepartRegress

  endog_mu = self._clean(endog / mu)
  endog_mu = self._clean(endog / mu)
  return np.exp(z)
  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)
  return np.exp(z)
  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)
  endog_mu = self._clean(endog / mu)
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
  return np.exp(z)
  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)
  wlsendog = (lin_pred + self.family.link.deriv(mu) * (self.endog-mu)
  endog_mu = self._clean(endog / mu)
  return np.exp(z)
  resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha)


Model Number: 652 with model SeasonalNaive in generation 5 of 10
Model Number: 653 with model SeasonalityMotif in generation 5 of 10
Model Number: 654 with model ETS in generation 5 of 10
Model Number: 655 with model AverageValueNaive in generation 5 of 10
Model Number: 656 with model MetricMotif in generation 5 of 10
Model Number: 657 with model LastValueNaive in generation 5 of 10
Model Number: 658 with model FFT in generation 5 of 10
Model Number: 659 with model ETS in generation 5 of 10
New Generation: 6 of 10
Model Number: 660 with model LastValueNaive in generation 6 of 10
Model Number: 661 with model GLS in generation 6 of 10
Model Number: 662 with model GLS in generation 6 of 10
Model Number: 663 with model ETS in generation 6 of 10
Model Number: 664 with model GLM in generation 6 of 10
Model Number: 665 with model WindowRegression in generation 6 of 10


  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Model Number: 666 with model ETS in generation 6 of 10
Model Number: 667 with model AverageValueNaive in generation 6 of 10
Model Number: 668 with model RRVAR in generation 6 of 10
Model Number: 669 with model GLS in generation 6 of 10
Model Number: 670 with model GLS in generation 6 of 10
Model Number: 671 with model RRVAR in generation 6 of 10
Model Number: 672 with model FFT in generation 6 of 10
Model Number: 673 with model WindowRegression in generation 6 of 10
Model Number: 674 with model SeasonalityMotif in generation 6 of 10
Model Number: 675 with model ETS in generation 6 of 10
Model Number: 676 with model AverageValueNaive in generation 6 of 10
Model Number: 677 with model GLM in generation 6 of 10
Model Number: 678 with model SeasonalityMotif in generation 6 of 10
Model Number: 679 with model RRVAR in generation 6 of 10
Model Number: 680 with model GLM in generation 6 of 10
Model Number: 681 with model SeasonalityMotif in generation 6 of 10
Model Number: 682 with model Avera



Model Number: 722 with model MetricMotif in generation 6 of 10
Template Eval Error: ValueError('kth(=14) out of bounds (9)') in model 722 in generation 6: MetricMotif
Model Number: 723 with model ETS in generation 6 of 10
Template Eval Error: Exception("Transformer HolidayTransformer failed on fit from params akima {'0': {'threshold': 0.7, 'splash_threshold': None, 'use_dayofmonth_holidays': True, 'use_wkdom_holidays': True, 'use_wkdeom_holidays': False, 'use_lunar_holidays': False, 'use_lunar_weekday': False, 'use_islamic_holidays': False, 'use_hebrew_holidays': False, 'use_hindu_holidays': False, 'anomaly_detector_params': {'method': 'rolling_zscore', 'method_params': {'distribution': 'norm', 'alpha': 0.05, 'rolling_periods': 28, 'center': True}, 'fillna': 'ffill', 'transform_dict': None, 'isolated_only': True, 'on_inverse': False}, 'remove_excess_anomalies': True, 'impact': 'regression', 'regression_params': {}}} with error ValueError('more than 1 year of data is required for holida

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Model Number: 741 with model SeasonalityMotif in generation 6 of 10
Template Eval Error: ValueError('window shape cannot be larger than input array shape') in model 741 in generation 6: SeasonalityMotif
Model Number: 742 with model RRVAR in generation 6 of 10
Template Eval Error: Exception("Transformer AlignLastValue failed on fit from params mean {'0': {'rows': 1, 'lag': 1, 'method': 'additive', 'strength': 0.5, 'first_value_only': False, 'threshold': 10, 'threshold_method': 'max'}, '1': {}, '2': {'rows': 1, 'lag': 84, 'method': 'additive', 'strength': 1.0, 'first_value_only': False, 'threshold': None, 'threshold_method': 'max'}} with error IndexError('single positional indexer is out-of-bounds')") in model 742 in generation 6: RRVAR
Model Number: 743 with model AverageValueNaive in generation 6 of 10
New Generation: 7 of 10
Model Number: 744 with model AverageValueNaive in generation 7 of 10
Model Number: 745 with model WindowRegression in generation 7 of 10
Template Eval Error: Modu



Model Number: 899 with model BasicLinearModel in generation 8 of 10
Model Number: 900 with model UnivariateMotif in generation 8 of 10
Model Number: 901 with model SeasonalNaive in generation 8 of 10
Model Number: 902 with model SeasonalityMotif in generation 8 of 10
Model Number: 903 with model AverageValueNaive in generation 8 of 10
Model Number: 904 with model AverageValueNaive in generation 8 of 10
Model Number: 905 with model AverageValueNaive in generation 8 of 10
Model Number: 906 with model GLS in generation 8 of 10
Model Number: 907 with model ConstantNaive in generation 8 of 10
Model Number: 908 with model RRVAR in generation 8 of 10
Model Number: 909 with model SeasonalNaive in generation 8 of 10
Model Number: 910 with model SeasonalityMotif in generation 8 of 10
Model Number: 911 with model GLM in generation 8 of 10




New Generation: 9 of 10
Model Number: 912 with model GLM in generation 9 of 10
Template Eval Error: ValueError('regression_type=user and no future_regressor passed') in model 912 in generation 9: GLM
Model Number: 913 with model ETS in generation 9 of 10
Model Number: 914 with model LastValueNaive in generation 9 of 10
Model Number: 915 with model WindowRegression in generation 9 of 10
Model Number: 916 with model UnivariateMotif in generation 9 of 10
Model Number: 917 with model GLM in generation 9 of 10
Model Number: 918 with model GLS in generation 9 of 10
Model Number: 919 with model SeasonalityMotif in generation 9 of 10
Model Number: 920 with model MetricMotif in generation 9 of 10
Model Number: 921 with model GLS in generation 9 of 10
Template Eval Error: Exception("Transformer HolidayTransformer failed on fit from params nearest {'0': {'threshold': 0.7, 'splash_threshold': None, 'use_dayofmonth_holidays': True, 'use_wkdom_holidays': True, 'use_wkdeom_holidays': False, 'use_luna

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Model Number: 946 with model ARDL in generation 9 of 10
Model Number: 947 with model FFT in generation 9 of 10
Model Number: 948 with model ETS in generation 9 of 10
Model Number: 949 with model GLM in generation 9 of 10
Template Eval Error: ValueError('NaN, inf or invalid value detected in weights, estimation infeasible.') in model 949 in generation 9: GLM
Model Number: 950 with model RRVAR in generation 9 of 10


  endog_mu = self._clean(endog / mu)
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
  endog_mu = self._clean(endog / mu)
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
  endog_mu = self._clean(endog / mu)
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
  endog_mu = self._clean(endog / mu)
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
  return np.exp(z)
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
  wlsendog = (lin_pred + self.family.link.deriv(mu) * (self.endog-mu)


Model Number: 951 with model SeasonalityMotif in generation 9 of 10
Model Number: 952 with model FFT in generation 9 of 10
Model Number: 953 with model Cassandra in generation 9 of 10
Template Eval Error: ValueError('more than 1 year of data is required for holiday detection.') in model 953 in generation 9: Cassandra
Model Number: 954 with model SeasonalNaive in generation 9 of 10
Model Number: 955 with model SeasonalNaive in generation 9 of 10
Model Number: 956 with model GLM in generation 9 of 10


  return np.exp(z)
  endog * np.log(endog / mu) + (mu - endog))
  endog * np.log(endog / mu) + (mu - endog))
  endog * np.log(endog / mu) + (mu - endog))
  return np.sum(resid / self.family.variance(mu)) / self.df_resid
  return np.sum(resid / self.family.variance(mu)) / self.df_resid
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
  wlsendog = (lin_pred + self.family.link.deriv(mu) * (self.endog-mu)


Template Eval Error: ValueError('NaN, inf or invalid value detected in weights, estimation infeasible.') in model 956 in generation 9: GLM
Model Number: 957 with model ARDL in generation 9 of 10
Model Number: 958 with model ARDL in generation 9 of 10
Model Number: 959 with model FFT in generation 9 of 10
Template Eval Error: Exception("Transformer AlignLastValue failed on fit from params nearest {'0': {'rows': 1, 'lag': 84, 'method': 'additive', 'strength': 1.0, 'first_value_only': False, 'threshold': 10, 'threshold_method': 'max'}, '1': {'regression_model': {'model': 'ElasticNet', 'model_params': {'l1_ratio': 0.5, 'fit_intercept': True, 'selection': 'cyclic', 'max_iter': 1000}}, 'datepart_method': ['dayofweek', [365.25, 4]], 'polynomial_degree': None, 'transform_dict': {'fillna': 'ffill_mean_biased', 'transformations': {'0': 'AlignLastValue'}, 'transformation_params': {'0': {'rows': 84, 'lag': 1, 'method': 'additive', 'strength': 1.0, 'first_value_only': False, 'threshold': 3, 'thresh

  endog_mu = self._clean(endog / mu)
  endog_mu = self._clean(endog / mu)
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
  endog_mu = self._clean(endog / mu)
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
  endog_mu = self._clean(endog / mu)
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))


Model Number: 992 with model GLM in generation 9 of 10
Template Eval Error: Exception("Transformer HolidayTransformer failed on fit from params ffill {'0': {}, '1': {'output_distribution': 'uniform', 'n_quantiles': 17}, '2': {'threshold': 0.8, 'splash_threshold': None, 'use_dayofmonth_holidays': True, 'use_wkdom_holidays': True, 'use_wkdeom_holidays': False, 'use_lunar_holidays': False, 'use_lunar_weekday': False, 'use_islamic_holidays': False, 'use_hebrew_holidays': False, 'use_hindu_holidays': False, 'anomaly_detector_params': {'method': 'rolling_zscore', 'method_params': {'distribution': 'uniform', 'alpha': 0.01, 'rolling_periods': 300, 'center': True}, 'fillna': 'linear', 'transform_dict': None, 'isolated_only': False, 'on_inverse': False}, 'remove_excess_anomalies': True, 'impact': 'datepart_regression', 'regression_params': {'regression_model': {'model': 'ElasticNet', 'model_params': {'l1_ratio': 0.5, 'fit_intercept': True, 'selection': 'cyclic', 'max_iter': 1000}}, 'datepart_met

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Model Number: 1213 with model Ensemble in generation 11 of Ensembles
Model Number: 1227 with model Ensemble in generation 11 of Ensembles
Model Number: 1241 with model Ensemble in generation 11 of Ensembles
Model Number: 1255 with model Ensemble in generation 11 of Ensembles
Model Number: 1269 with model Ensemble in generation 11 of Ensembles
Validation Round: 1
Model Number: 1 of 191 with model Ensemble for Validation 1
prediction too long for indepedent=False, falling back on indepedent=True
📈 1 - Ensemble with avg smape 118.05: 
2 - Ensemble with avg smape 118.05: 
📈 3 - Ensemble with avg smape 0.0: 
4 - Ensemble with avg smape 131.51: 
Template Eval Error: Exception("Transformer AlignLastValue failed on fit from params ffill {'0': {'rows': 1, 'lag': 1, 'method': 'additive', 'strength': 0.7, 'first_value_only': False, 'threshold': 1, 'threshold_method': 'max'}, '1': {'lag_1': 7, 'method': 20}, '2': {'rows': 1, 'lag': 28, 'method': 'multiplicative', 'strength': 1.0, 'first_value_only

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


41 - Ensemble with avg smape 123.71: 
42 - Ensemble with avg smape 123.64: 
43 - Ensemble with avg smape 0.0: 
44 - Ensemble with avg smape 133.16: 
Template Eval Error: Exception("Transformer AlignLastValue failed on fit from params ffill {'0': {'rows': 1, 'lag': 1, 'method': 'additive', 'strength': 0.7, 'first_value_only': False, 'threshold': 1, 'threshold_method': 'max'}, '1': {'lag_1': 7, 'method': 20}, '2': {'rows': 1, 'lag': 28, 'method': 'multiplicative', 'strength': 1.0, 'first_value_only': False, 'threshold': None, 'threshold_method': 'mean'}} with error IndexError('single positional indexer is out-of-bounds')") in model 44 in generation 0: Ensemble
Model Number: 45 of 191 with model SeasonalityMotif for Validation 1
prediction too long for indepedent=False, falling back on indepedent=True
45 - SeasonalityMotif with avg smape 118.05: 
Model Number: 46 of 191 with model SeasonalityMotif for Validation 1
prediction too long for indepedent=False, falling back on indepedent=True
4

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


99 - ETS with avg smape 118.05: 
Model Number: 100 of 191 with model ETS for Validation 1
100 - ETS with avg smape 118.27: 
Model Number: 101 of 191 with model ETS for Validation 1
101 - ETS with avg smape 119.49: 
Model Number: 102 of 191 with model ETS for Validation 1
102 - ETS with avg smape 132.0: 
Model Number: 103 of 191 with model ETS for Validation 1
103 - ETS with avg smape 118.05: 
Model Number: 104 of 191 with model ETS for Validation 1
104 - ETS with avg smape 118.05: 
Model Number: 105 of 191 with model ETS for Validation 1
105 - ETS with avg smape 118.05: 
Model Number: 106 of 191 with model SeasonalNaive for Validation 1
106 - SeasonalNaive with avg smape 119.53: 
Model Number: 107 of 191 with model SeasonalNaive for Validation 1
107 - SeasonalNaive with avg smape 128.61: 
Model Number: 108 of 191 with model WindowRegression for Validation 1
108 - WindowRegression with avg smape 118.03: 
Model Number: 109 of 191 with model ConstantNaive for Validation 1
109 - ConstantNa

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


152 - ARDL with avg smape 130.43: 
Model Number: 153 of 191 with model GLM for Validation 1
153 - GLM with avg smape 130.47: 
Model Number: 154 of 191 with model GLM for Validation 1
154 - GLM with avg smape 130.47: 
Model Number: 155 of 191 with model ARDL for Validation 1
155 - ARDL with avg smape 126.83: 
Model Number: 156 of 191 with model GLM for Validation 1
156 - GLM with avg smape 133.12: 
Model Number: 157 of 191 with model ARDL for Validation 1
157 - ARDL with avg smape 159.56: 
Model Number: 158 of 191 with model ARDL for Validation 1
158 - ARDL with avg smape 133.63: 
Model Number: 159 of 191 with model BasicLinearModel for Validation 1
159 - BasicLinearModel with avg smape 127.01: 
Model Number: 160 of 191 with model DatepartRegression for Validation 1
160 - DatepartRegression with avg smape 141.51: 
Model Number: 161 of 191 with model DatepartRegression for Validation 1
161 - DatepartRegression with avg smape 130.31: 
Model Number: 162 of 191 with model SectionalMotif for



194 - BasicLinearModel with avg smape 140.1: 
Model Number: 195 of 191 with model BasicLinearModel for Validation 1
195 - BasicLinearModel with avg smape 134.15: 
Model Number: 196 of 191 with model FFT for Validation 1
196 - FFT with avg smape 134.55: 
Model Number: 197 of 191 with model BasicLinearModel for Validation 1
197 - BasicLinearModel with avg smape 136.08: 
Model Number: 198 of 191 with model MetricMotif for Validation 1
Template Eval Error: ValueError('kth(=14) out of bounds (7)') in model 198 in generation 0: MetricMotif
Model Number: 199 of 191 with model FFT for Validation 1
199 - FFT with avg smape 132.26: 
Model Number: 200 of 191 with model BasicLinearModel for Validation 1
200 - BasicLinearModel with avg smape 136.77: 
Model Number: 201 of 191 with model FFT for Validation 1
201 - FFT with avg smape 136.45: 
Model Number: 202 of 191 with model BasicLinearModel for Validation 1
202 - BasicLinearModel with avg smape 142.48: 
Model Number: 203 of 191 with model BasicLin

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


Model Number: 1633 with model Ensemble in generation 12 of Ensembles
Model Number: 1647 with model Ensemble in generation 12 of Ensembles
Model Number: 1661 with model Ensemble in generation 12 of Ensembles
Model Number: 1675 with model Ensemble in generation 12 of Ensembles
Model Number: 1689 with model Ensemble in generation 12 of Ensembles
Model Number: 1703 with model Ensemble in generation 12 of Ensembles
Model Number: 1717 with model Ensemble in generation 12 of Ensembles
Validation Round: 1
Model Number: 1 of 16 with model Ensemble for Validation 1
prediction too long for indepedent=False, falling back on indepedent=True
Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/autots/evaluator/auto_model.py", line 1430, in model_forecast
    df_forecast = model_forecast(
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/autots/evaluator/auto_model.py", line 1512, in model_forecast
    raise ValueError(

  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)
  t = np.exp(-z)


27 - Ensemble with avg smape 117.97: 
28 - Ensemble with avg smape 117.97: 
29 - Ensemble with avg smape 0.0: 
30 - Ensemble with avg smape 131.51: 
Template Eval Error: Exception("Transformer AlignLastValue failed on fit from params ffill {'0': {'rows': 1, 'lag': 1, 'method': 'additive', 'strength': 0.7, 'first_value_only': False, 'threshold': 1, 'threshold_method': 'max'}, '1': {'lag_1': 7, 'method': 20}, '2': {'rows': 1, 'lag': 28, 'method': 'multiplicative', 'strength': 1.0, 'first_value_only': False, 'threshold': None, 'threshold_method': 'mean'}} with error IndexError('single positional indexer is out-of-bounds')") in model 30 in generation 0: Ensemble
Model Number: 31 of 16 with model Ensemble for Validation 1
31 - Ensemble with avg smape 119.36: 
32 - Ensemble with avg smape 119.33: 
33 - Ensemble with avg smape 0.0: 
34 - Ensemble with avg smape 131.56: 
Template Eval Error: Exception("Transformer AlignLastValue failed on fit from params ffill {'0': {'rows': 1, 'lag': 1, 'meth

In [64]:
evaluation_results_w

[{'Profile': '02170cc6ad8d4839d4b103387977def165bea9631fd28de5379a59339bc6b942',
  'MAE_AutoTS_w_365': 25146.949999999997,
  'MSE_AutoTS_w_365': 632369094.3025},
 {'Profile': '0225ee6b651c06a7b02cf995cb466c63dc02b861b04fc5294457e5cd17c67267',
  'MAE_AutoTS_w_365': 7480.900000000001,
  'MSE_AutoTS_w_365': 55963864.81000002},
 {'Profile': '077e1d3b46ac249ee51fdb604c82dfbdf350f46a48fe2ca20ea3bc11614cec64',
  'MAE_AutoTS_w_365': 245818.30769230778,
  'MSE_AutoTS_w_365': 60451406331.76925},
 {'Profile': '0c997f518f0769c39126acb6538cae0f9c945346abecc99705370a49c377467e',
  'MAE_AutoTS_w_365': 75094.75,
  'MSE_AutoTS_w_365': 5639221477.5625},
 {'Profile': '100b80f30629b949b2b3015e944f81dbdee219520fecab758468d3e0c433e8f1',
  'MAE_AutoTS_w_365': 172708.2653846154,
  'MSE_AutoTS_w_365': 387728760899.6302},
 {'Profile': '10bcfb1593e1c348b04c1aa2aeb4d1add22a9ffe71ba7340e9649574b01b1dbe',
  'MAE_AutoTS_w_365': 1130.8769230769228,
  'MSE_AutoTS_w_365': 1423005.0563461534},
 {'Profile': '13ee2c9dc0ad

## 300-365

In [None]:
# Convert date column to datetime
between_365_300_data["date"] = pd.to_datetime(between_365_300_data["date"])

# Extract year and week number
between_365_300_data["year"] = between_365_300_data["date"].dt.year
between_365_300_data["week"] = between_365_300_data["date"].dt.isocalendar().week  # ISO week number

# Aggregate: Keep last follower count per week for each profile
weekly_df = (
    between_365_300_data.sort_values(["profile_id", "date"])
    .groupby(["profile_id", "year", "week"])
    .last()
    .reset_index()
)

# Convert back to a proper date format (set to Monday of the given week)
weekly_df["date"] = pd.to_datetime(weekly_df["year"].astype(str) + weekly_df["week"].astype(str) + '1', format='%G%V%w')

# Drop unnecessary columns
weekly_df = weekly_df[["date", "profile_id", "followers_num"]]

# Set a random seed for reproducibility
np.random.seed(42)  # You can use any number as the seed

# Extract unique IDs from the DataFrame
unique_ids = weekly_df['profile_id'].unique()

# Randomly select 60 IDs
selected_ids = np.random.choice(unique_ids, size=60, replace=False)

# Create a complete date range for each profile
full_date_range = pd.date_range(start="2023-01-02", end="2024-12-30", freq="W-MON")
full_index = pd.MultiIndex.from_product([selected_ids, full_date_range], names=["profile_id", "date"])

# Reindex the DataFrame
weekly_df = weekly_df.set_index(["profile_id", "date"]).reindex(full_index).reset_index()

# Filter the DataFrame to include only the selected IDs
weekly_df = weekly_df[weekly_df['profile_id'].isin(selected_ids)]

weekly_df.fillna(method='ffill', inplace=True)
weekly_df.fillna(method='bfill', inplace=True)
weekly_df.fillna(0, inplace=True)

# Split the data into training and testing sets
train_df = weekly_df[weekly_df['date'] < '2024-07-01']
test_df = weekly_df[weekly_df['date'] >= '2024-07-01']

# Pivot and prepare for AutoTS
def prepare_for_autots(df):
    df = df.pivot(index='date', columns='profile_id', values='followers_num')
    df = df.fillna(method='ffill')  # Forward-fill missing values
    df.index = df.index.to_period('W').to_timestamp('W')  # Convert index to week-end format
    df = df.fillna(method='ffill')
    return df

train_df = prepare_for_autots(train_df)
test_df = prepare_for_autots(test_df)

# Ensure no missing values
train_df.fillna(method='ffill', inplace=True)
train_df.fillna(method='bfill', inplace=True)
train_df.fillna(0, inplace=True)

test_df.fillna(method='ffill', inplace=True)
test_df.fillna(method='bfill', inplace=True)
test_df.fillna(0, inplace=True)

evaluation_results_w_300 = []

# ----- AUTOTS MODEL -----
# Initialize and train the AutoTS model
model = AutoTS(
    forecast_length=len(test_df),  
    frequency='W',  # Changed frequency to weekly
    ensemble='simple',
    max_generations=10,
    num_validations=3,
    validation_method='backwards'
)

# Train the model
model = model.fit(train_df)

# Generate predictions
predictions = model.predict(prediction_interval=0.95, fail_on_forecast_nan=False)
forecast_df = predictions.forecast.fillna(0)

# Align test data with predictions
test_df_aligned = test_df.loc[forecast_df.index]

# Evaluate AutoTS model
for profile_id in test_df_aligned.columns:
    true_values = test_df_aligned[profile_id]
    predicted_values = forecast_df[profile_id]
    mae_autots = mean_absolute_error(true_values, predicted_values)
    mse_autots = mean_squared_error(true_values, predicted_values)
    evaluation_results_w_300.append({'Profile': profile_id, 'MAE_AutoTS_w_300': mae_autots, 'MSE_AutoTS_w_300': mse_autots})

# Convert to DataFrame
evaluation_df_w_300 = pd.DataFrame(evaluation_results_w_300)

In [None]:
evaluation_results_w_300

## 90-299

In [76]:
# get the ids of the accounts that have 50 to 299 records
between_299_90_ids = profile_counts[
    (profile_counts < 300) & (profile_counts >= 90)
].index
# filter the data to include only the accounts that have 50 to 299 records
between_299_90_data = data[data["profile_id"].isin(between_299_90_ids)]

In [None]:
# Convert date column to datetime
between_299_90_data["date"] = pd.to_datetime(between_299_90_data["date"])

# Extract year and week number
between_299_90_data["year"] = between_299_90_data["date"].dt.year
between_299_90_data["week"] = between_299_90_data["date"].dt.isocalendar().week  # ISO week number

# Aggregate: Keep last follower count per week for each profile
weekly_df = (
    between_299_90_data.sort_values(["profile_id", "date"])
    .groupby(["profile_id", "year", "week"])
    .last()
    .reset_index()
)

# Convert back to a proper date format (set to Monday of the given week)
weekly_df["date"] = pd.to_datetime(weekly_df["year"].astype(str) + weekly_df["week"].astype(str) + '1', format='%G%V%w')

# Drop unnecessary columns
weekly_df = weekly_df[["date", "profile_id", "followers_num"]]

# Set a random seed for reproducibility
np.random.seed(42)  # You can use any number as the seed

# Extract unique IDs from the DataFrame
unique_ids = weekly_df['profile_id'].unique()

# Randomly select 60 IDs
selected_ids = np.random.choice(unique_ids, size=60, replace=False)

# Create a complete date range for each profile
full_date_range = pd.date_range(start="2023-01-02", end="2024-12-30", freq="W-MON")
full_index = pd.MultiIndex.from_product([selected_ids, full_date_range], names=["profile_id", "date"])

# Reindex the DataFrame
weekly_df = weekly_df.set_index(["profile_id", "date"]).reindex(full_index).reset_index()

# Filter the DataFrame to include only the selected IDs
weekly_df = weekly_df[weekly_df['profile_id'].isin(selected_ids)]

weekly_df.fillna(method='ffill', inplace=True)
weekly_df.fillna(method='bfill', inplace=True)
weekly_df.fillna(0, inplace=True)

# Split the data into training and testing sets
train_df = weekly_df[weekly_df['date'] < '2024-07-01']
test_df = weekly_df[weekly_df['date'] >= '2024-07-01']

# Pivot and prepare for AutoTS
def prepare_for_autots(df):
    df = df.pivot(index='date', columns='profile_id', values='followers_num')
    df = df.fillna(method='ffill')  # Forward-fill missing values
    df.index = df.index.to_period('W').to_timestamp('W')  # Convert index to week-end format
    df = df.fillna(method='ffill')
    return df

train_df = prepare_for_autots(train_df)
test_df = prepare_for_autots(test_df)

# Ensure no missing values
train_df.fillna(method='ffill', inplace=True)
train_df.fillna(method='bfill', inplace=True)
train_df.fillna(0, inplace=True)

test_df.fillna(method='ffill', inplace=True)
test_df.fillna(method='bfill', inplace=True)
test_df.fillna(0, inplace=True)

evaluation_results_w_90 = []

# ----- AUTOTS MODEL -----
# Initialize and train the AutoTS model
model = AutoTS(
    forecast_length=len(test_df),  
    frequency='W',  # Changed frequency to weekly
    ensemble='simple',
    max_generations=10,
    num_validations=3,
    validation_method='backwards'
)

# Train the model
model = model.fit(train_df)

# Generate predictions
predictions = model.predict(prediction_interval=0.95, fail_on_forecast_nan=False)
forecast_df = predictions.forecast.fillna(0)

# Align test data with predictions
test_df_aligned = test_df.loc[forecast_df.index]

# Evaluate AutoTS model
for profile_id in test_df_aligned.columns:
    true_values = test_df_aligned[profile_id]
    predicted_values = forecast_df[profile_id]
    mae_autots = mean_absolute_error(true_values, predicted_values)
    mse_autots = mean_squared_error(true_values, predicted_values)
    evaluation_results_w_90.append({'Profile': profile_id, 'MAE_AutoTS_w_50': mae_autots, 'MSE_AutoTS_w_50': mse_autots})

# Convert to DataFrame
evaluation_df_w_90 = pd.DataFrame(evaluation_results_w_90)

In [None]:
evaluation_results_w_90

In [80]:
evaluation_df_w_90

Unnamed: 0,Profile,MAE_AutoTS_w_50,MSE_AutoTS_w_50
0,00ed31934cb3ec073edcc77b23fd93bada5ee0047ddd31...,65423.78,4280460000.0
1,0967bebbf092787a3d3400ce1c1a012d88cbc1af75b7f7...,6883.733,111512200.0
2,0d40472f6bca0211ad17770e9990344d16cf05b1f0fcd5...,3341.385,15490670.0
3,103a0b7815fdec7b7920309d62b5bfb1d36bed9b909c68...,83994.06,7055003000.0
4,14c9357c4e4621e88d078764f978b2d0afed460175dc69...,0.0,0.0
5,1672870e984cf7cbc3d10e576d66acf1b25b5a029b2282...,1177063.0,2002506000000.0
6,167d0013957e4cbf435c12d3fd63e3001392dc37d7b90f...,1.515825e-13,3.446586e-26
7,1aa839d0b39346e243f7e5499b1afcc020ebf829307dda...,62.3,3881.29
8,1ea02215d567605cc3ae010a3783a1b3c4a98e283fb335...,41388.19,1713311000.0
9,1ec8fb36c578c721e00d4143c6d2c2f66bbffb4d7fdb58...,5772.4,33320600.0
