<a href="https://colab.research.google.com/github/hxm0707/Raw_Material_Price_Prediction/blob/main/LASSO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_absolute_percentage_error
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
test_periods = [
    ('2019-01-01', '2019-07-01'),
    ('2019-07-01', '2020-01-01'),
    ('2020-01-01', '2020-07-01'),
    ('2020-07-01', '2021-01-01'),
    ('2021-01-01', '2021-07-01'),
    ('2021-07-01', '2022-01-01'),
    ('2022-01-01', '2022-07-01'),
    ('2022-07-01', '2023-01-01'),
    ('2023-01-01', '2023-07-01'),
    ('2023-07-01', '2024-01-01'),
]

In [None]:
def calculate_mape_1(test_start, test_end,alpha_low):
  # Split data into train and test sets
  train_df = feature_df[(feature_df['Time'] > '2015-12-31') & (feature_df['Time'] < test_start)]
  test_df = feature_df[(feature_df['Time'] >= test_start) & (feature_df['Time'] < test_end)]

  ## ------ This part needs to be changed for each forecasting horizon
  # Create X, y
  X_train = train_df.drop(['Time', 'Group Description', 'Year', 'Month', 'Average_price'],axis=1)
  X_test = test_df.drop(['Time', 'Group Description', 'Year','Month','Average_price'],axis=1)

  y_train = train_df['Average_price'].values
  y_test = test_df['Average_price'].values
  ## ------ This part needs to be changed for each forecasting horizon

  # Standardlisation
  scaler_x = StandardScaler()
  X_train_scaled = scaler_x.fit_transform(X_train)
  X_test_scaled = scaler_x.transform(X_test)
  scaler_y = StandardScaler()
  y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1,1))
  y_test_scaled = scaler_y.transform(y_test.reshape(-1,1))
  # Define the parameter grid
  param_grid = {'alpha': np.linspace(alpha_low, 1, 3000)}
  # Create a Lasso regression model
  lasso = Lasso()
  # Create RandomizedSearchCV object
  random_search = RandomizedSearchCV(estimator=lasso,
                                   param_distributions=param_grid,
                                   n_iter=300,
                                   cv=5,
                                   random_state=42)
  # Fit the data to perform a grid search
  random_search.fit(X_train_scaled, y_train_scaled)
  assert random_search.n_features_in_ == len(X_train.columns)
  # Get the best Lasso model from RandomizedSearchCV
  best_lasso_model = random_search.best_estimator_
  # Predict on the test data
  y_pred_test = best_lasso_model.predict(X_test_scaled)
  y_pred_test_inverse = scaler_y.inverse_transform(y_pred_test.reshape(-1,1)) # the model was trained with log-transformed and standardlised y
  mape = round(mean_absolute_percentage_error(y_test,y_pred_test_inverse), 3)
  mape = mape * 100
  return (mape)

In [None]:
def calculate_mape_3(test_start, test_end, alpha_low):
  # Split data into train and test sets
  train_df = feature_df[(feature_df['Time'] > '2015-12-31') & (feature_df['Time'] < test_start)]
  test_df = feature_df[(feature_df['Time'] >= test_start) & (feature_df['Time'] < test_end)]

  ## ------ This part needs to be changed for each forecasting horizon
  # Create X, y
  X_train = train_df.drop(['Time', 'Group Description', 'Year','Month','Average_price'],axis=1)
  X_train = X_train[X_train.columns.drop(list(X_train.filter(regex='_1$|_2$')))]
  X_test = test_df.drop(['Time', 'Group Description', 'Year','Month','Average_price'],axis=1)
  X_test = X_test[X_test.columns.drop(list(X_test.filter(regex='_1$|_2$')))]

  y_train = train_df['Average_price'].values
  y_test = test_df['Average_price'].values
  ## ------ This part needs to be changed for each forecasting horizon

  # Standardlisation
  scaler_x = StandardScaler()
  X_train_scaled = scaler_x.fit_transform(X_train)
  X_test_scaled = scaler_x.transform(X_test)
  scaler_y = StandardScaler()
  y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1,1))
  y_test_scaled = scaler_y.transform(y_test.reshape(-1,1))
  # Define the parameter grid
  param_grid = {'alpha': np.linspace(alpha_low, 1, 3000)}
  # Create a Lasso regression model
  lasso = Lasso()
  # Create RandomizedSearchCV object
  random_search = RandomizedSearchCV(estimator=lasso,
                                   param_distributions=param_grid,
                                   n_iter=300,
                                   cv=5,
                                   random_state=42)
  # Fit the data to perform a grid search
  random_search.fit(X_train_scaled, y_train_scaled)
  assert random_search.n_features_in_ == len(X_train.columns)
  # Get the best Lasso model from RandomizedSearchCV
  best_lasso_model = random_search.best_estimator_
  # Predict on the test data
  y_pred_test = best_lasso_model.predict(X_test_scaled)
  y_pred_test_inverse = scaler_y.inverse_transform(y_pred_test.reshape(-1,1)) # the model was trained with log-transformed and standardlised y
  mape = round(mean_absolute_percentage_error(y_test,y_pred_test_inverse), 3)
  mape = mape * 100
  return (mape)

In [None]:
def calculate_mape_6(test_start, test_end, alpha_low):
  # Split data into train and test sets
  train_df = feature_df[(feature_df['Time'] > '2015-12-31') & (feature_df['Time'] < test_start)]
  test_df = feature_df[(feature_df['Time'] >= test_start) & (feature_df['Time'] < test_end)]

  ## ------ This part needs to be changed for each forecasting horizon
  # Create X, y
  X_train = train_df.drop(['Time', 'Group Description', 'Year','Month','Average_price'],axis=1)
  X_train = X_train[X_train.columns.drop(list(X_train.filter(regex='_1$|_2$|_3$|_4$|_5$')))]
  X_test = test_df.drop(['Time', 'Group Description', 'Year','Month','Average_price'],axis=1)
  X_test = X_test[X_test.columns.drop(list(X_test.filter(regex='_1$|_2$|_3$|_4$|_5$')))]

  y_train = train_df['Average_price'].values
  y_test = test_df['Average_price'].values
  ## ------ This part needs to be changed for each forecasting horizon

  # Standardlisation
  scaler_x = StandardScaler()
  X_train_scaled = scaler_x.fit_transform(X_train)
  X_test_scaled = scaler_x.transform(X_test)
  scaler_y = StandardScaler()
  y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1,1))
  y_test_scaled = scaler_y.transform(y_test.reshape(-1,1))
  # Define the parameter grid
  param_grid = {'alpha': np.linspace(alpha_low, 1, 3000)}
  # Create a Lasso regression model
  lasso = Lasso()
  # Create RandomizedSearchCV object
  random_search = RandomizedSearchCV(estimator=lasso,
                                   param_distributions=param_grid,
                                   n_iter=300,
                                   cv=5,
                                   random_state=42)
  # Fit the data to perform a grid search
  random_search.fit(X_train_scaled, y_train_scaled)
  assert random_search.n_features_in_ == len(X_train.columns)
  # Get the best Lasso model from RandomizedSearchCV
  best_lasso_model = random_search.best_estimator_
  # Predict on the test data
  y_pred_test = best_lasso_model.predict(X_test_scaled)
  y_pred_test_inverse = scaler_y.inverse_transform(y_pred_test.reshape(-1,1)) # the model was trained with log-transformed and standardlised y
  mape = round(mean_absolute_percentage_error(y_test,y_pred_test_inverse), 3)
  mape = mape * 100
  return (mape)

# Alkalis

In [None]:
feature_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Thesis/dataframe/alkalis_feature.csv',index_col=0)
feature_df['Time'] = pd.to_datetime(feature_df['Time'])
print(feature_df)

     RM02/0001  RM02/0002  Year  Month       Time Group Description  \
24       False       True  2016      1 2016-01-31           alkalis   
25        True      False  2016      1 2016-01-31           alkalis   
26       False       True  2016      2 2016-02-29           alkalis   
27        True      False  2016      2 2016-02-29           alkalis   
28       False       True  2016      3 2016-03-31           alkalis   
..         ...        ...   ...    ...        ...               ...   
211       True      False  2023     10 2023-10-31           alkalis   
212      False       True  2023     11 2023-11-30           alkalis   
213       True      False  2023     11 2023-11-30           alkalis   
214      False       True  2023     12 2023-12-20           alkalis   
215       True      False  2023     12 2023-12-31           alkalis   

     Average_price  PNGASEUUSDM_1  Electricity_1  PNGASEUUSDM_2  ...  \
24        0.409500       5.810000          48.38       5.870000  ...   
25 

In [None]:
df_0001 = feature_df[feature_df['RM02/0001'] == 1]
print(df_0001.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 25 to 215
Data columns (total 43 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM02/0001          96 non-null     bool          
 1   RM02/0002          96 non-null     bool          
 2   Year               96 non-null     int64         
 3   Month              96 non-null     int64         
 4   Time               96 non-null     datetime64[ns]
 5   Group Description  96 non-null     object        
 6   Average_price      96 non-null     float64       
 7   PNGASEUUSDM_1      96 non-null     float64       
 8   Electricity_1      96 non-null     float64       
 9   PNGASEUUSDM_2      96 non-null     float64       
 10  Electricity_2      96 non-null     float64       
 11  PNGASEUUSDM_3      96 non-null     float64       
 12  Electricity_3      96 non-null     float64       
 13  PNGASEUUSDM_4      96 non-null     float64       
 14  Electricity_4  

In [None]:
df_0002 = feature_df[feature_df['RM02/0002'] == 1]
print(df_0002.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 24 to 214
Data columns (total 43 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM02/0001          96 non-null     bool          
 1   RM02/0002          96 non-null     bool          
 2   Year               96 non-null     int64         
 3   Month              96 non-null     int64         
 4   Time               96 non-null     datetime64[ns]
 5   Group Description  96 non-null     object        
 6   Average_price      96 non-null     float64       
 7   PNGASEUUSDM_1      96 non-null     float64       
 8   Electricity_1      96 non-null     float64       
 9   PNGASEUUSDM_2      96 non-null     float64       
 10  Electricity_2      96 non-null     float64       
 11  PNGASEUUSDM_3      96 non-null     float64       
 12  Electricity_3      96 non-null     float64       
 13  PNGASEUUSDM_4      96 non-null     float64       
 14  Electricity_4  

## 0001

In [None]:
feature_df = df_0001

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 25 to 215
Data columns (total 41 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               96 non-null     int64         
 1   Month              96 non-null     int64         
 2   Time               96 non-null     datetime64[ns]
 3   Group Description  96 non-null     object        
 4   Average_price      96 non-null     float64       
 5   PNGASEUUSDM_1      96 non-null     float64       
 6   Electricity_1      96 non-null     float64       
 7   PNGASEUUSDM_2      96 non-null     float64       
 8   Electricity_2      96 non-null     float64       
 9   PNGASEUUSDM_3      96 non-null     float64       
 10  Electricity_3      96 non-null     float64       
 11  PNGASEUUSDM_4      96 non-null     float64       
 12  Electricity_4      96 non-null     float64       
 13  PNGASEUUSDM_5      96 non-null     float64       
 14  Electricity_5  

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 2.6
MAPE from 2019-07-01 to 2020-01-01: 3.8
MAPE from 2020-01-01 to 2020-07-01: 6.1
MAPE from 2020-07-01 to 2021-01-01: 7.8
MAPE from 2021-01-01 to 2021-07-01: 6.5
MAPE from 2021-07-01 to 2022-01-01: 7.7
MAPE from 2022-01-01 to 2022-07-01: 10.0
MAPE from 2022-07-01 to 2023-01-01: 12.8
MAPE from 2023-01-01 to 2023-07-01: 39.8
MAPE from 2023-07-01 to 2024-01-01: 3.3


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 10.0


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 5.1
MAPE from 2019-07-01 to 2020-01-01: 10.6
MAPE from 2020-01-01 to 2020-07-01: 12.7
MAPE from 2020-07-01 to 2021-01-01: 15.7
MAPE from 2021-01-01 to 2021-07-01: 18.2
MAPE from 2021-07-01 to 2022-01-01: 12.8
MAPE from 2022-01-01 to 2022-07-01: 17.9
MAPE from 2022-07-01 to 2023-01-01: 26.2
MAPE from 2023-01-01 to 2023-07-01: 59.8
MAPE from 2023-07-01 to 2024-01-01: 5.2


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 18.4


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.6)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 7.5
MAPE from 2019-07-01 to 2020-01-01: 11.9
MAPE from 2020-01-01 to 2020-07-01: 17.4
MAPE from 2020-07-01 to 2021-01-01: 24.2
MAPE from 2021-01-01 to 2021-07-01: 36.2
MAPE from 2021-07-01 to 2022-01-01: 14.3
MAPE from 2022-01-01 to 2022-07-01: 43.3
MAPE from 2022-07-01 to 2023-01-01: 55.3
MAPE from 2023-01-01 to 2023-07-01: 27.9
MAPE from 2023-07-01 to 2024-01-01: 18.3


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 25.6


## 0002

In [None]:
feature_df = df_0002

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 24 to 214
Data columns (total 41 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               96 non-null     int64         
 1   Month              96 non-null     int64         
 2   Time               96 non-null     datetime64[ns]
 3   Group Description  96 non-null     object        
 4   Average_price      96 non-null     float64       
 5   PNGASEUUSDM_1      96 non-null     float64       
 6   Electricity_1      96 non-null     float64       
 7   PNGASEUUSDM_2      96 non-null     float64       
 8   Electricity_2      96 non-null     float64       
 9   PNGASEUUSDM_3      96 non-null     float64       
 10  Electricity_3      96 non-null     float64       
 11  PNGASEUUSDM_4      96 non-null     float64       
 12  Electricity_4      96 non-null     float64       
 13  PNGASEUUSDM_5      96 non-null     float64       
 14  Electricity_5  

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 4.5
MAPE from 2019-07-01 to 2020-01-01: 7.8
MAPE from 2020-01-01 to 2020-07-01: 10.9
MAPE from 2020-07-01 to 2021-01-01: 3.4
MAPE from 2021-01-01 to 2021-07-01: 4.0
MAPE from 2021-07-01 to 2022-01-01: 12.1
MAPE from 2022-01-01 to 2022-07-01: 17.5
MAPE from 2022-07-01 to 2023-01-01: 10.7
MAPE from 2023-01-01 to 2023-07-01: 16.0
MAPE from 2023-07-01 to 2024-01-01: 6.0


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 9.3


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.3)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 7.0
MAPE from 2019-07-01 to 2020-01-01: 8.7
MAPE from 2020-01-01 to 2020-07-01: 13.4
MAPE from 2020-07-01 to 2021-01-01: 4.4
MAPE from 2021-01-01 to 2021-07-01: 4.7
MAPE from 2021-07-01 to 2022-01-01: 14.9
MAPE from 2022-01-01 to 2022-07-01: 40.7
MAPE from 2022-07-01 to 2023-01-01: 18.5
MAPE from 2023-01-01 to 2023-07-01: 23.1
MAPE from 2023-07-01 to 2024-01-01: 6.2


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 14.2


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.5)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 11.9
MAPE from 2019-07-01 to 2020-01-01: 8.3
MAPE from 2020-01-01 to 2020-07-01: 15.0
MAPE from 2020-07-01 to 2021-01-01: 7.0
MAPE from 2021-01-01 to 2021-07-01: 3.9
MAPE from 2021-07-01 to 2022-01-01: 15.5
MAPE from 2022-01-01 to 2022-07-01: 58.3
MAPE from 2022-07-01 to 2023-01-01: 54.5
MAPE from 2023-01-01 to 2023-07-01: 14.6
MAPE from 2023-07-01 to 2024-01-01: 7.0


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 19.6


# Bleaching Agent

In [None]:
feature_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Thesis/dataframe/bleaching agent_feature.csv',index_col=0)
feature_df['Time'] = pd.to_datetime(feature_df['Time'])
print(feature_df)

     RM04/0001  RM04/0006  Year  Month       Time Group Description  \
12       False       True  2016      1 2016-01-31   bleaching agent   
13       False       True  2016      2 2016-02-29   bleaching agent   
14       False       True  2016      3 2016-03-31   bleaching agent   
15       False       True  2016      4 2016-04-30   bleaching agent   
16       False       True  2016      5 2016-05-31   bleaching agent   
..         ...        ...   ...    ...        ...               ...   
213       True      False  2023      8 2023-08-31   bleaching agent   
214       True      False  2023      9 2023-09-30   bleaching agent   
215       True      False  2023     10 2023-10-31   bleaching agent   
216       True      False  2023     11 2023-11-30   bleaching agent   
217       True      False  2023     12 2023-12-31   bleaching agent   

     Average_price  PNGASEUUSDM_1  Electricity_1  PNGASEUUSDM_2  ...  \
12        0.130089       5.810000          48.38       5.870000  ...   
13 

In [None]:
df_0001 = feature_df[feature_df['RM04/0001'] == 1]
print(df_0001.info())

<class 'pandas.core.frame.DataFrame'>
Index: 93 entries, 123 to 217
Data columns (total 43 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM04/0001          93 non-null     bool          
 1   RM04/0006          93 non-null     bool          
 2   Year               93 non-null     int64         
 3   Month              93 non-null     int64         
 4   Time               93 non-null     datetime64[ns]
 5   Group Description  93 non-null     object        
 6   Average_price      93 non-null     float64       
 7   PNGASEUUSDM_1      93 non-null     float64       
 8   Electricity_1      93 non-null     float64       
 9   PNGASEUUSDM_2      93 non-null     float64       
 10  Electricity_2      93 non-null     float64       
 11  PNGASEUUSDM_3      93 non-null     float64       
 12  Electricity_3      93 non-null     float64       
 13  PNGASEUUSDM_4      93 non-null     float64       
 14  Electricity_4 

In [None]:
df_0006 = feature_df[feature_df['RM04/0006'] == 1]
print(df_0006.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 12 to 107
Data columns (total 43 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM04/0001          96 non-null     bool          
 1   RM04/0006          96 non-null     bool          
 2   Year               96 non-null     int64         
 3   Month              96 non-null     int64         
 4   Time               96 non-null     datetime64[ns]
 5   Group Description  96 non-null     object        
 6   Average_price      96 non-null     float64       
 7   PNGASEUUSDM_1      96 non-null     float64       
 8   Electricity_1      96 non-null     float64       
 9   PNGASEUUSDM_2      96 non-null     float64       
 10  Electricity_2      96 non-null     float64       
 11  PNGASEUUSDM_3      96 non-null     float64       
 12  Electricity_3      96 non-null     float64       
 13  PNGASEUUSDM_4      96 non-null     float64       
 14  Electricity_4  

## 0001

In [None]:
feature_df = df_0001

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 93 entries, 123 to 217
Data columns (total 41 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               93 non-null     int64         
 1   Month              93 non-null     int64         
 2   Time               93 non-null     datetime64[ns]
 3   Group Description  93 non-null     object        
 4   Average_price      93 non-null     float64       
 5   PNGASEUUSDM_1      93 non-null     float64       
 6   Electricity_1      93 non-null     float64       
 7   PNGASEUUSDM_2      93 non-null     float64       
 8   Electricity_2      93 non-null     float64       
 9   PNGASEUUSDM_3      93 non-null     float64       
 10  Electricity_3      93 non-null     float64       
 11  PNGASEUUSDM_4      93 non-null     float64       
 12  Electricity_4      93 non-null     float64       
 13  PNGASEUUSDM_5      93 non-null     float64       
 14  Electricity_5 

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 6.7
MAPE from 2019-07-01 to 2020-01-01: 7.2
MAPE from 2020-01-01 to 2020-07-01: 7.1
MAPE from 2020-07-01 to 2021-01-01: 2.2
MAPE from 2021-01-01 to 2021-07-01: 6.0
MAPE from 2021-07-01 to 2022-01-01: 8.5
MAPE from 2022-01-01 to 2022-07-01: 22.7
MAPE from 2022-07-01 to 2023-01-01: 18.5
MAPE from 2023-01-01 to 2023-07-01: 15.8
MAPE from 2023-07-01 to 2024-01-01: 8.3


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 10.3


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.5)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 6.7
MAPE from 2019-07-01 to 2020-01-01: 7.2
MAPE from 2020-01-01 to 2020-07-01: 10.1
MAPE from 2020-07-01 to 2021-01-01: 6.0
MAPE from 2021-01-01 to 2021-07-01: 8.5
MAPE from 2021-07-01 to 2022-01-01: 11.7
MAPE from 2022-01-01 to 2022-07-01: 34.5
MAPE from 2022-07-01 to 2023-01-01: 28.1
MAPE from 2023-01-01 to 2023-07-01: 17.4
MAPE from 2023-07-01 to 2024-01-01: 16.2


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 14.6


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.3)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 6.7
MAPE from 2019-07-01 to 2020-01-01: 7.2
MAPE from 2020-01-01 to 2020-07-01: 10.1
MAPE from 2020-07-01 to 2021-01-01: 3.1
MAPE from 2021-01-01 to 2021-07-01: 7.0
MAPE from 2021-07-01 to 2022-01-01: 10.0
MAPE from 2022-01-01 to 2022-07-01: 32.2
MAPE from 2022-07-01 to 2023-01-01: 26.8
MAPE from 2023-01-01 to 2023-07-01: 40.0
MAPE from 2023-07-01 to 2024-01-01: 10.7


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 15.4


## 0006

In [None]:
feature_df = df_0006

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 12 to 107
Data columns (total 41 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               96 non-null     int64         
 1   Month              96 non-null     int64         
 2   Time               96 non-null     datetime64[ns]
 3   Group Description  96 non-null     object        
 4   Average_price      96 non-null     float64       
 5   PNGASEUUSDM_1      96 non-null     float64       
 6   Electricity_1      96 non-null     float64       
 7   PNGASEUUSDM_2      96 non-null     float64       
 8   Electricity_2      96 non-null     float64       
 9   PNGASEUUSDM_3      96 non-null     float64       
 10  Electricity_3      96 non-null     float64       
 11  PNGASEUUSDM_4      96 non-null     float64       
 12  Electricity_4      96 non-null     float64       
 13  PNGASEUUSDM_5      96 non-null     float64       
 14  Electricity_5  

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 11.7
MAPE from 2019-07-01 to 2020-01-01: 6.1
MAPE from 2020-01-01 to 2020-07-01: 22.7
MAPE from 2020-07-01 to 2021-01-01: 10.9
MAPE from 2021-01-01 to 2021-07-01: 2.7
MAPE from 2021-07-01 to 2022-01-01: 7.7
MAPE from 2022-01-01 to 2022-07-01: 18.0
MAPE from 2022-07-01 to 2023-01-01: 12.9
MAPE from 2023-01-01 to 2023-07-01: 6.4
MAPE from 2023-07-01 to 2024-01-01: 8.9


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 10.8


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 11.7
MAPE from 2019-07-01 to 2020-01-01: 6.2
MAPE from 2020-01-01 to 2020-07-01: 23.4
MAPE from 2020-07-01 to 2021-01-01: 11.8
MAPE from 2021-01-01 to 2021-07-01: 5.3
MAPE from 2021-07-01 to 2022-01-01: 7.5
MAPE from 2022-01-01 to 2022-07-01: 20.2
MAPE from 2022-07-01 to 2023-01-01: 15.0
MAPE from 2023-01-01 to 2023-07-01: 34.7
MAPE from 2023-07-01 to 2024-01-01: 10.7


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 14.7


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.3)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 11.7
MAPE from 2019-07-01 to 2020-01-01: 9.7
MAPE from 2020-01-01 to 2020-07-01: 25.8
MAPE from 2020-07-01 to 2021-01-01: 16.0
MAPE from 2021-01-01 to 2021-07-01: 4.6
MAPE from 2021-07-01 to 2022-01-01: 9.4
MAPE from 2022-01-01 to 2022-07-01: 25.6
MAPE from 2022-07-01 to 2023-01-01: 28.5
MAPE from 2023-01-01 to 2023-07-01: 30.7
MAPE from 2023-07-01 to 2024-01-01: 6.4


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 16.8


# Acid

In [None]:
feature_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Thesis/dataframe/acid_feature.csv',index_col=0)
feature_df['Time'] = pd.to_datetime(feature_df['Time'])
print(feature_df)

     RM01/0001  RM01/0004  RM01/0006  RM01/0007  Year  Month       Time  \
2        False       True      False      False  2016      1 2016-01-31   
3         True      False      False      False  2016      1 2016-01-31   
4        False       True      False      False  2016      2 2016-02-29   
5         True      False      False      False  2016      2 2016-02-29   
6        False       True      False      False  2016      3 2016-03-31   
..         ...        ...        ...        ...   ...    ...        ...   
400      False      False       True      False  2023      8 2023-08-31   
401      False      False       True      False  2023      9 2023-09-28   
402      False      False       True      False  2023     10 2023-10-30   
403      False      False       True      False  2023     11 2023-11-30   
404      False      False       True      False  2023     12 2023-12-22   

    Group Description  Average_price  PNGASEUUSDM_1  ...      AR_3      AR_4  \
2                ac

In [None]:
df_0001 = feature_df[feature_df['RM01/0001'] == 1]
print(df_0001.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 3 to 193
Data columns (total 69 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM01/0001          96 non-null     bool          
 1   RM01/0004          96 non-null     bool          
 2   RM01/0006          96 non-null     bool          
 3   RM01/0007          96 non-null     bool          
 4   Year               96 non-null     int64         
 5   Month              96 non-null     int64         
 6   Time               96 non-null     datetime64[ns]
 7   Group Description  96 non-null     object        
 8   Average_price      96 non-null     float64       
 9   PNGASEUUSDM_1      96 non-null     float64       
 10  PWHEAMTUSDM_1      96 non-null     float64       
 11  WPU0652013A_1      96 non-null     float64       
 12  Electricity_1      96 non-null     float64       
 13  PNGASEUUSDM_2      96 non-null     float64       
 14  PWHEAMTUSDM_2   

In [None]:
df_0004 = feature_df[feature_df['RM01/0004'] == 1]
print(df_0004.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 2 to 192
Data columns (total 69 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM01/0001          96 non-null     bool          
 1   RM01/0004          96 non-null     bool          
 2   RM01/0006          96 non-null     bool          
 3   RM01/0007          96 non-null     bool          
 4   Year               96 non-null     int64         
 5   Month              96 non-null     int64         
 6   Time               96 non-null     datetime64[ns]
 7   Group Description  96 non-null     object        
 8   Average_price      96 non-null     float64       
 9   PNGASEUUSDM_1      96 non-null     float64       
 10  PWHEAMTUSDM_1      96 non-null     float64       
 11  WPU0652013A_1      96 non-null     float64       
 12  Electricity_1      96 non-null     float64       
 13  PNGASEUUSDM_2      96 non-null     float64       
 14  PWHEAMTUSDM_2   

In [None]:
df_0006 = feature_df[feature_df['RM01/0006'] == 1]
print(df_0006.info())

<class 'pandas.core.frame.DataFrame'>
Index: 95 entries, 309 to 404
Data columns (total 69 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM01/0001          95 non-null     bool          
 1   RM01/0004          95 non-null     bool          
 2   RM01/0006          95 non-null     bool          
 3   RM01/0007          95 non-null     bool          
 4   Year               95 non-null     int64         
 5   Month              95 non-null     int64         
 6   Time               95 non-null     datetime64[ns]
 7   Group Description  95 non-null     object        
 8   Average_price      95 non-null     float64       
 9   PNGASEUUSDM_1      95 non-null     float64       
 10  PWHEAMTUSDM_1      95 non-null     float64       
 11  WPU0652013A_1      95 non-null     float64       
 12  Electricity_1      95 non-null     float64       
 13  PNGASEUUSDM_2      95 non-null     float64       
 14  PWHEAMTUSDM_2 

In [None]:
df_0007 = feature_df[feature_df['RM01/0007'] == 1]
print(df_0007.info())

<class 'pandas.core.frame.DataFrame'>
Index: 75 entries, 213 to 307
Data columns (total 69 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM01/0001          75 non-null     bool          
 1   RM01/0004          75 non-null     bool          
 2   RM01/0006          75 non-null     bool          
 3   RM01/0007          75 non-null     bool          
 4   Year               75 non-null     int64         
 5   Month              75 non-null     int64         
 6   Time               75 non-null     datetime64[ns]
 7   Group Description  75 non-null     object        
 8   Average_price      75 non-null     float64       
 9   PNGASEUUSDM_1      75 non-null     float64       
 10  PWHEAMTUSDM_1      75 non-null     float64       
 11  WPU0652013A_1      75 non-null     float64       
 12  Electricity_1      75 non-null     float64       
 13  PNGASEUUSDM_2      75 non-null     float64       
 14  PWHEAMTUSDM_2 

## 0001

In [None]:
feature_df = df_0001

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 3 to 193
Data columns (total 65 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               96 non-null     int64         
 1   Month              96 non-null     int64         
 2   Time               96 non-null     datetime64[ns]
 3   Group Description  96 non-null     object        
 4   Average_price      96 non-null     float64       
 5   PNGASEUUSDM_1      96 non-null     float64       
 6   PWHEAMTUSDM_1      96 non-null     float64       
 7   WPU0652013A_1      96 non-null     float64       
 8   Electricity_1      96 non-null     float64       
 9   PNGASEUUSDM_2      96 non-null     float64       
 10  PWHEAMTUSDM_2      96 non-null     float64       
 11  WPU0652013A_2      96 non-null     float64       
 12  Electricity_2      96 non-null     float64       
 13  PNGASEUUSDM_3      96 non-null     float64       
 14  PWHEAMTUSDM_3   

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 6.8
MAPE from 2019-07-01 to 2020-01-01: 2.3
MAPE from 2020-01-01 to 2020-07-01: 1.4
MAPE from 2020-07-01 to 2021-01-01: 1.7
MAPE from 2021-01-01 to 2021-07-01: 14.5
MAPE from 2021-07-01 to 2022-01-01: 5.9
MAPE from 2022-01-01 to 2022-07-01: 3.4
MAPE from 2022-07-01 to 2023-01-01: 1.9
MAPE from 2023-01-01 to 2023-07-01: 6.3
MAPE from 2023-07-01 to 2024-01-01: 7.6


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 5.2


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.01)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 15.5
MAPE from 2019-07-01 to 2020-01-01: 6.0
MAPE from 2020-01-01 to 2020-07-01: 3.1
MAPE from 2020-07-01 to 2021-01-01: 3.2
MAPE from 2021-01-01 to 2021-07-01: 28.2
MAPE from 2021-07-01 to 2022-01-01: 20.6
MAPE from 2022-01-01 to 2022-07-01: 13.8
MAPE from 2022-07-01 to 2023-01-01: 3.5
MAPE from 2023-01-01 to 2023-07-01: 6.4
MAPE from 2023-07-01 to 2024-01-01: 12.9


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 11.3


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.05)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 12.4
MAPE from 2019-07-01 to 2020-01-01: 10.4
MAPE from 2020-01-01 to 2020-07-01: 5.6
MAPE from 2020-07-01 to 2021-01-01: 5.5
MAPE from 2021-01-01 to 2021-07-01: 31.2
MAPE from 2021-07-01 to 2022-01-01: 41.3
MAPE from 2022-01-01 to 2022-07-01: 12.2
MAPE from 2022-07-01 to 2023-01-01: 17.3
MAPE from 2023-01-01 to 2023-07-01: 8.1
MAPE from 2023-07-01 to 2024-01-01: 18.3


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 16.2


## 0004

In [None]:
feature_df = df_0004

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 2 to 192
Data columns (total 65 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               96 non-null     int64         
 1   Month              96 non-null     int64         
 2   Time               96 non-null     datetime64[ns]
 3   Group Description  96 non-null     object        
 4   Average_price      96 non-null     float64       
 5   PNGASEUUSDM_1      96 non-null     float64       
 6   PWHEAMTUSDM_1      96 non-null     float64       
 7   WPU0652013A_1      96 non-null     float64       
 8   Electricity_1      96 non-null     float64       
 9   PNGASEUUSDM_2      96 non-null     float64       
 10  PWHEAMTUSDM_2      96 non-null     float64       
 11  WPU0652013A_2      96 non-null     float64       
 12  Electricity_2      96 non-null     float64       
 13  PNGASEUUSDM_3      96 non-null     float64       
 14  PWHEAMTUSDM_3   

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 7.1
MAPE from 2019-07-01 to 2020-01-01: 12.0
MAPE from 2020-01-01 to 2020-07-01: 12.3
MAPE from 2020-07-01 to 2021-01-01: 18.9
MAPE from 2021-01-01 to 2021-07-01: 11.6
MAPE from 2021-07-01 to 2022-01-01: 8.6
MAPE from 2022-01-01 to 2022-07-01: 10.3
MAPE from 2022-07-01 to 2023-01-01: 8.5
MAPE from 2023-01-01 to 2023-07-01: 7.3
MAPE from 2023-07-01 to 2024-01-01: 3.2


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 10.0


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 11.3
MAPE from 2019-07-01 to 2020-01-01: 17.6
MAPE from 2020-01-01 to 2020-07-01: 23.2
MAPE from 2020-07-01 to 2021-01-01: 17.7
MAPE from 2021-01-01 to 2021-07-01: 17.0
MAPE from 2021-07-01 to 2022-01-01: 23.3
MAPE from 2022-01-01 to 2022-07-01: 10.3
MAPE from 2022-07-01 to 2023-01-01: 18.5
MAPE from 2023-01-01 to 2023-07-01: 15.5
MAPE from 2023-07-01 to 2024-01-01: 3.6


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 15.8


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.3)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 9.7
MAPE from 2019-07-01 to 2020-01-01: 26.2
MAPE from 2020-01-01 to 2020-07-01: 29.9
MAPE from 2020-07-01 to 2021-01-01: 35.2
MAPE from 2021-01-01 to 2021-07-01: 19.9
MAPE from 2021-07-01 to 2022-01-01: 37.2
MAPE from 2022-01-01 to 2022-07-01: 26.7
MAPE from 2022-07-01 to 2023-01-01: 9.5
MAPE from 2023-01-01 to 2023-07-01: 49.5
MAPE from 2023-07-01 to 2024-01-01: 4.5


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 24.8


## 0006

In [None]:
feature_df = df_0006

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 95 entries, 309 to 404
Data columns (total 65 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               95 non-null     int64         
 1   Month              95 non-null     int64         
 2   Time               95 non-null     datetime64[ns]
 3   Group Description  95 non-null     object        
 4   Average_price      95 non-null     float64       
 5   PNGASEUUSDM_1      95 non-null     float64       
 6   PWHEAMTUSDM_1      95 non-null     float64       
 7   WPU0652013A_1      95 non-null     float64       
 8   Electricity_1      95 non-null     float64       
 9   PNGASEUUSDM_2      95 non-null     float64       
 10  PWHEAMTUSDM_2      95 non-null     float64       
 11  WPU0652013A_2      95 non-null     float64       
 12  Electricity_2      95 non-null     float64       
 13  PNGASEUUSDM_3      95 non-null     float64       
 14  PWHEAMTUSDM_3 

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 5.3
MAPE from 2019-07-01 to 2020-01-01: 5.2
MAPE from 2020-01-01 to 2020-07-01: 7.5
MAPE from 2020-07-01 to 2021-01-01: 4.7
MAPE from 2021-01-01 to 2021-07-01: 6.5
MAPE from 2021-07-01 to 2022-01-01: 15.0
MAPE from 2022-01-01 to 2022-07-01: 27.7
MAPE from 2022-07-01 to 2023-01-01: 25.5
MAPE from 2023-01-01 to 2023-07-01: 6.7
MAPE from 2023-07-01 to 2024-01-01: 11.9


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 11.6


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 5.3
MAPE from 2019-07-01 to 2020-01-01: 4.8
MAPE from 2020-01-01 to 2020-07-01: 6.4
MAPE from 2020-07-01 to 2021-01-01: 4.2
MAPE from 2021-01-01 to 2021-07-01: 5.5
MAPE from 2021-07-01 to 2022-01-01: 16.3
MAPE from 2022-01-01 to 2022-07-01: 54.0
MAPE from 2022-07-01 to 2023-01-01: 23.6
MAPE from 2023-01-01 to 2023-07-01: 6.3
MAPE from 2023-07-01 to 2024-01-01: 12.0


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 13.8


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.4)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 3.2
MAPE from 2019-07-01 to 2020-01-01: 3.7
MAPE from 2020-01-01 to 2020-07-01: 7.7
MAPE from 2020-07-01 to 2021-01-01: 4.8
MAPE from 2021-01-01 to 2021-07-01: 5.0
MAPE from 2021-07-01 to 2022-01-01: 20.4
MAPE from 2022-01-01 to 2022-07-01: 57.1
MAPE from 2022-07-01 to 2023-01-01: 44.1
MAPE from 2023-01-01 to 2023-07-01: 17.0
MAPE from 2023-07-01 to 2024-01-01: 12.8


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 17.6


## 0007

In [None]:
feature_df = df_0007

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 75 entries, 213 to 307
Data columns (total 65 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               75 non-null     int64         
 1   Month              75 non-null     int64         
 2   Time               75 non-null     datetime64[ns]
 3   Group Description  75 non-null     object        
 4   Average_price      75 non-null     float64       
 5   PNGASEUUSDM_1      75 non-null     float64       
 6   PWHEAMTUSDM_1      75 non-null     float64       
 7   WPU0652013A_1      75 non-null     float64       
 8   Electricity_1      75 non-null     float64       
 9   PNGASEUUSDM_2      75 non-null     float64       
 10  PWHEAMTUSDM_2      75 non-null     float64       
 11  WPU0652013A_2      75 non-null     float64       
 12  Electricity_2      75 non-null     float64       
 13  PNGASEUUSDM_3      75 non-null     float64       
 14  PWHEAMTUSDM_3 

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.01)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

  model = cd_fast.enet_coordinate_descent(


MAPE from 2019-01-01 to 2019-07-01: 40.4
MAPE from 2019-07-01 to 2020-01-01: 13.9
MAPE from 2020-01-01 to 2020-07-01: 26.3
MAPE from 2020-07-01 to 2021-01-01: 9.1
MAPE from 2021-01-01 to 2021-07-01: 17.5


  model = cd_fast.enet_coordinate_descent(


MAPE from 2021-07-01 to 2022-01-01: 21.8
MAPE from 2022-01-01 to 2022-07-01: 51.1
MAPE from 2022-07-01 to 2023-01-01: 47.7
MAPE from 2023-01-01 to 2023-07-01: 5.1
MAPE from 2023-07-01 to 2024-01-01: 6.1


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 23.9


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 40.4
MAPE from 2019-07-01 to 2020-01-01: 13.5
MAPE from 2020-01-01 to 2020-07-01: 32.0
MAPE from 2020-07-01 to 2021-01-01: 12.3
MAPE from 2021-01-01 to 2021-07-01: 20.5
MAPE from 2021-07-01 to 2022-01-01: 26.6
MAPE from 2022-01-01 to 2022-07-01: 43.9
MAPE from 2022-07-01 to 2023-01-01: 44.7
MAPE from 2023-01-01 to 2023-07-01: 17.9
MAPE from 2023-07-01 to 2024-01-01: 12.4


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 26.4


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.4)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 40.4
MAPE from 2019-07-01 to 2020-01-01: 18.1
MAPE from 2020-01-01 to 2020-07-01: 33.8
MAPE from 2020-07-01 to 2021-01-01: 10.1
MAPE from 2021-01-01 to 2021-07-01: 12.8
MAPE from 2021-07-01 to 2022-01-01: 26.6
MAPE from 2022-01-01 to 2022-07-01: 51.1
MAPE from 2022-07-01 to 2023-01-01: 57.3
MAPE from 2023-01-01 to 2023-07-01: 16.6
MAPE from 2023-07-01 to 2024-01-01: 8.5


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 27.5


# Non-ionic Surfactant

In [None]:
feature_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Thesis/dataframe/non-ionic surfactant_feature.csv',index_col=0)
feature_df['Time'] = pd.to_datetime(feature_df['Time'])
print(feature_df)

     RM12/0002  RM12/0003  RM12/0005  RM12/0008  RM12/0012  Year  Month  \
2        False      False       True      False      False  2016      1   
3        False       True      False      False      False  2016      1   
4        False      False       True      False      False  2016      2   
5        False       True      False      False      False  2016      2   
6        False      False       True      False      False  2016      3   
..         ...        ...        ...        ...        ...   ...    ...   
488       True      False      False      False      False  2023      8   
489       True      False      False      False      False  2023      9   
490       True      False      False      False      False  2023     10   
491       True      False      False      False      False  2023     11   
492       True      False      False      False      False  2023     12   

          Time     Group Description  Average_price  ...      AR_3      AR_4  \
2   2016-01-29  non

In [None]:
df_0002 = feature_df[feature_df['RM12/0002'] == 1]
print(df_0002.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 397 to 492
Data columns (total 82 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM12/0002          96 non-null     bool          
 1   RM12/0003          96 non-null     bool          
 2   RM12/0005          96 non-null     bool          
 3   RM12/0008          96 non-null     bool          
 4   RM12/0012          96 non-null     bool          
 5   Year               96 non-null     int64         
 6   Month              96 non-null     int64         
 7   Time               96 non-null     datetime64[ns]
 8   Group Description  96 non-null     object        
 9   Average_price      96 non-null     float64       
 10  PNGASEUUSDM_1      96 non-null     float64       
 11  POILBREUSDM_1      96 non-null     float64       
 12  PPOILUSDM_1        96 non-null     float64       
 13  WPU0652013A_1      96 non-null     float64       
 14  Electricity_1 

In [None]:
df_0003 = feature_df[feature_df['RM12/0003'] == 1]
print(df_0003.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 3 to 193
Data columns (total 82 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM12/0002          96 non-null     bool          
 1   RM12/0003          96 non-null     bool          
 2   RM12/0005          96 non-null     bool          
 3   RM12/0008          96 non-null     bool          
 4   RM12/0012          96 non-null     bool          
 5   Year               96 non-null     int64         
 6   Month              96 non-null     int64         
 7   Time               96 non-null     datetime64[ns]
 8   Group Description  96 non-null     object        
 9   Average_price      96 non-null     float64       
 10  PNGASEUUSDM_1      96 non-null     float64       
 11  POILBREUSDM_1      96 non-null     float64       
 12  PPOILUSDM_1        96 non-null     float64       
 13  WPU0652013A_1      96 non-null     float64       
 14  Electricity_1   

In [None]:
df_0005 = feature_df[feature_df['RM12/0005'] == 1]
print(df_0005.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 2 to 192
Data columns (total 82 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM12/0002          96 non-null     bool          
 1   RM12/0003          96 non-null     bool          
 2   RM12/0005          96 non-null     bool          
 3   RM12/0008          96 non-null     bool          
 4   RM12/0012          96 non-null     bool          
 5   Year               96 non-null     int64         
 6   Month              96 non-null     int64         
 7   Time               96 non-null     datetime64[ns]
 8   Group Description  96 non-null     object        
 9   Average_price      96 non-null     float64       
 10  PNGASEUUSDM_1      96 non-null     float64       
 11  POILBREUSDM_1      96 non-null     float64       
 12  PPOILUSDM_1        96 non-null     float64       
 13  WPU0652013A_1      96 non-null     float64       
 14  Electricity_1   

In [None]:
df_0008 = feature_df[feature_df['RM12/0008'] == 1]
print(df_0008.info())

<class 'pandas.core.frame.DataFrame'>
Index: 94 entries, 299 to 394
Data columns (total 82 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM12/0002          94 non-null     bool          
 1   RM12/0003          94 non-null     bool          
 2   RM12/0005          94 non-null     bool          
 3   RM12/0008          94 non-null     bool          
 4   RM12/0012          94 non-null     bool          
 5   Year               94 non-null     int64         
 6   Month              94 non-null     int64         
 7   Time               94 non-null     datetime64[ns]
 8   Group Description  94 non-null     object        
 9   Average_price      94 non-null     float64       
 10  PNGASEUUSDM_1      94 non-null     float64       
 11  POILBREUSDM_1      94 non-null     float64       
 12  PPOILUSDM_1        94 non-null     float64       
 13  WPU0652013A_1      94 non-null     float64       
 14  Electricity_1 

In [None]:
df_0012 = feature_df[feature_df['RM12/0012'] == 1]
print(df_0012.info())

<class 'pandas.core.frame.DataFrame'>
Index: 87 entries, 202 to 297
Data columns (total 82 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM12/0002          87 non-null     bool          
 1   RM12/0003          87 non-null     bool          
 2   RM12/0005          87 non-null     bool          
 3   RM12/0008          87 non-null     bool          
 4   RM12/0012          87 non-null     bool          
 5   Year               87 non-null     int64         
 6   Month              87 non-null     int64         
 7   Time               87 non-null     datetime64[ns]
 8   Group Description  87 non-null     object        
 9   Average_price      87 non-null     float64       
 10  PNGASEUUSDM_1      87 non-null     float64       
 11  POILBREUSDM_1      87 non-null     float64       
 12  PPOILUSDM_1        87 non-null     float64       
 13  WPU0652013A_1      87 non-null     float64       
 14  Electricity_1 

## 0002

In [None]:
feature_df = df_0002

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 397 to 492
Data columns (total 77 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               96 non-null     int64         
 1   Month              96 non-null     int64         
 2   Time               96 non-null     datetime64[ns]
 3   Group Description  96 non-null     object        
 4   Average_price      96 non-null     float64       
 5   PNGASEUUSDM_1      96 non-null     float64       
 6   POILBREUSDM_1      96 non-null     float64       
 7   PPOILUSDM_1        96 non-null     float64       
 8   WPU0652013A_1      96 non-null     float64       
 9   Electricity_1      96 non-null     float64       
 10  PNGASEUUSDM_2      96 non-null     float64       
 11  POILBREUSDM_2      96 non-null     float64       
 12  PPOILUSDM_2        96 non-null     float64       
 13  WPU0652013A_2      96 non-null     float64       
 14  Electricity_2 

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 6.5
MAPE from 2019-07-01 to 2020-01-01: 10.6
MAPE from 2020-01-01 to 2020-07-01: 9.6
MAPE from 2020-07-01 to 2021-01-01: 9.3
MAPE from 2021-01-01 to 2021-07-01: 23.4
MAPE from 2021-07-01 to 2022-01-01: 20.0
MAPE from 2022-01-01 to 2022-07-01: 10.8
MAPE from 2022-07-01 to 2023-01-01: 6.6
MAPE from 2023-01-01 to 2023-07-01: 14.0
MAPE from 2023-07-01 to 2024-01-01: 7.3


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 11.8


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 6.5
MAPE from 2019-07-01 to 2020-01-01: 10.6
MAPE from 2020-01-01 to 2020-07-01: 9.8
MAPE from 2020-07-01 to 2021-01-01: 9.4
MAPE from 2021-01-01 to 2021-07-01: 23.0
MAPE from 2021-07-01 to 2022-01-01: 25.7
MAPE from 2022-01-01 to 2022-07-01: 11.2
MAPE from 2022-07-01 to 2023-01-01: 9.0
MAPE from 2023-01-01 to 2023-07-01: 12.7
MAPE from 2023-07-01 to 2024-01-01: 8.3


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 12.6


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.3)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 6.1
MAPE from 2019-07-01 to 2020-01-01: 10.1
MAPE from 2020-01-01 to 2020-07-01: 10.1
MAPE from 2020-07-01 to 2021-01-01: 9.8
MAPE from 2021-01-01 to 2021-07-01: 23.5
MAPE from 2021-07-01 to 2022-01-01: 28.0
MAPE from 2022-01-01 to 2022-07-01: 18.9
MAPE from 2022-07-01 to 2023-01-01: 4.0
MAPE from 2023-01-01 to 2023-07-01: 11.7
MAPE from 2023-07-01 to 2024-01-01: 8.7


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 13.1


## 0003

In [None]:
feature_df = df_0003

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 3 to 193
Data columns (total 77 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               96 non-null     int64         
 1   Month              96 non-null     int64         
 2   Time               96 non-null     datetime64[ns]
 3   Group Description  96 non-null     object        
 4   Average_price      96 non-null     float64       
 5   PNGASEUUSDM_1      96 non-null     float64       
 6   POILBREUSDM_1      96 non-null     float64       
 7   PPOILUSDM_1        96 non-null     float64       
 8   WPU0652013A_1      96 non-null     float64       
 9   Electricity_1      96 non-null     float64       
 10  PNGASEUUSDM_2      96 non-null     float64       
 11  POILBREUSDM_2      96 non-null     float64       
 12  PPOILUSDM_2        96 non-null     float64       
 13  WPU0652013A_2      96 non-null     float64       
 14  Electricity_2   

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 8.0
MAPE from 2019-07-01 to 2020-01-01: 6.7
MAPE from 2020-01-01 to 2020-07-01: 9.4
MAPE from 2020-07-01 to 2021-01-01: 11.3
MAPE from 2021-01-01 to 2021-07-01: 7.0
MAPE from 2021-07-01 to 2022-01-01: 7.2
MAPE from 2022-01-01 to 2022-07-01: 26.3
MAPE from 2022-07-01 to 2023-01-01: 5.1
MAPE from 2023-01-01 to 2023-07-01: 6.0
MAPE from 2023-07-01 to 2024-01-01: 5.2


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 9.2


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 7.2
MAPE from 2019-07-01 to 2020-01-01: 6.4
MAPE from 2020-01-01 to 2020-07-01: 10.8
MAPE from 2020-07-01 to 2021-01-01: 10.6
MAPE from 2021-01-01 to 2021-07-01: 7.2
MAPE from 2021-07-01 to 2022-01-01: 7.3
MAPE from 2022-01-01 to 2022-07-01: 27.2
MAPE from 2022-07-01 to 2023-01-01: 6.1
MAPE from 2023-01-01 to 2023-07-01: 7.4
MAPE from 2023-07-01 to 2024-01-01: 6.3


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 9.7


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 7.3
MAPE from 2019-07-01 to 2020-01-01: 7.4
MAPE from 2020-01-01 to 2020-07-01: 10.9
MAPE from 2020-07-01 to 2021-01-01: 12.6
MAPE from 2021-01-01 to 2021-07-01: 7.5
MAPE from 2021-07-01 to 2022-01-01: 7.9
MAPE from 2022-01-01 to 2022-07-01: 34.4
MAPE from 2022-07-01 to 2023-01-01: 6.2
MAPE from 2023-01-01 to 2023-07-01: 11.3
MAPE from 2023-07-01 to 2024-01-01: 6.6


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 11.2


## 0005

In [None]:
feature_df = df_0005

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 2 to 192
Data columns (total 77 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               96 non-null     int64         
 1   Month              96 non-null     int64         
 2   Time               96 non-null     datetime64[ns]
 3   Group Description  96 non-null     object        
 4   Average_price      96 non-null     float64       
 5   PNGASEUUSDM_1      96 non-null     float64       
 6   POILBREUSDM_1      96 non-null     float64       
 7   PPOILUSDM_1        96 non-null     float64       
 8   WPU0652013A_1      96 non-null     float64       
 9   Electricity_1      96 non-null     float64       
 10  PNGASEUUSDM_2      96 non-null     float64       
 11  POILBREUSDM_2      96 non-null     float64       
 12  PPOILUSDM_2        96 non-null     float64       
 13  WPU0652013A_2      96 non-null     float64       
 14  Electricity_2   

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 8.4
MAPE from 2019-07-01 to 2020-01-01: 9.6
MAPE from 2020-01-01 to 2020-07-01: 7.2
MAPE from 2020-07-01 to 2021-01-01: 13.2
MAPE from 2021-01-01 to 2021-07-01: 8.6
MAPE from 2021-07-01 to 2022-01-01: 5.7
MAPE from 2022-01-01 to 2022-07-01: 21.9
MAPE from 2022-07-01 to 2023-01-01: 6.2
MAPE from 2023-01-01 to 2023-07-01: 21.1
MAPE from 2023-07-01 to 2024-01-01: 4.3


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 10.6


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 8.4
MAPE from 2019-07-01 to 2020-01-01: 9.6
MAPE from 2020-01-01 to 2020-07-01: 7.2
MAPE from 2020-07-01 to 2021-01-01: 13.1
MAPE from 2021-01-01 to 2021-07-01: 8.6
MAPE from 2021-07-01 to 2022-01-01: 5.9
MAPE from 2022-01-01 to 2022-07-01: 21.9
MAPE from 2022-07-01 to 2023-01-01: 5.1
MAPE from 2023-01-01 to 2023-07-01: 23.2
MAPE from 2023-07-01 to 2024-01-01: 4.0


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 10.7


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 8.9
MAPE from 2019-07-01 to 2020-01-01: 11.7
MAPE from 2020-01-01 to 2020-07-01: 7.2
MAPE from 2020-07-01 to 2021-01-01: 13.6
MAPE from 2021-01-01 to 2021-07-01: 8.6
MAPE from 2021-07-01 to 2022-01-01: 5.6
MAPE from 2022-01-01 to 2022-07-01: 21.9
MAPE from 2022-07-01 to 2023-01-01: 3.7
MAPE from 2023-01-01 to 2023-07-01: 25.4
MAPE from 2023-07-01 to 2024-01-01: 5.3


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 11.2


## 0008

In [None]:
feature_df = df_0008

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 94 entries, 299 to 394
Data columns (total 77 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               94 non-null     int64         
 1   Month              94 non-null     int64         
 2   Time               94 non-null     datetime64[ns]
 3   Group Description  94 non-null     object        
 4   Average_price      94 non-null     float64       
 5   PNGASEUUSDM_1      94 non-null     float64       
 6   POILBREUSDM_1      94 non-null     float64       
 7   PPOILUSDM_1        94 non-null     float64       
 8   WPU0652013A_1      94 non-null     float64       
 9   Electricity_1      94 non-null     float64       
 10  PNGASEUUSDM_2      94 non-null     float64       
 11  POILBREUSDM_2      94 non-null     float64       
 12  PPOILUSDM_2        94 non-null     float64       
 13  WPU0652013A_2      94 non-null     float64       
 14  Electricity_2 

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.3)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 17.8
MAPE from 2019-07-01 to 2020-01-01: 13.7
MAPE from 2020-01-01 to 2020-07-01: 15.6
MAPE from 2020-07-01 to 2021-01-01: 18.3
MAPE from 2021-01-01 to 2021-07-01: 14.9
MAPE from 2021-07-01 to 2022-01-01: 12.1
MAPE from 2022-01-01 to 2022-07-01: 7.3
MAPE from 2022-07-01 to 2023-01-01: 4.8
MAPE from 2023-01-01 to 2023-07-01: 3.7
MAPE from 2023-07-01 to 2024-01-01: 7.4


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 11.6


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.3)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 17.8
MAPE from 2019-07-01 to 2020-01-01: 13.4
MAPE from 2020-01-01 to 2020-07-01: 15.6
MAPE from 2020-07-01 to 2021-01-01: 18.3
MAPE from 2021-01-01 to 2021-07-01: 14.9
MAPE from 2021-07-01 to 2022-01-01: 12.1
MAPE from 2022-01-01 to 2022-07-01: 7.3
MAPE from 2022-07-01 to 2023-01-01: 4.5
MAPE from 2023-01-01 to 2023-07-01: 3.8
MAPE from 2023-07-01 to 2024-01-01: 7.7


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 11.5


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.3)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 17.8
MAPE from 2019-07-01 to 2020-01-01: 13.9
MAPE from 2020-01-01 to 2020-07-01: 15.7
MAPE from 2020-07-01 to 2021-01-01: 18.9
MAPE from 2021-01-01 to 2021-07-01: 14.9
MAPE from 2021-07-01 to 2022-01-01: 12.1
MAPE from 2022-01-01 to 2022-07-01: 13.5
MAPE from 2022-07-01 to 2023-01-01: 7.0
MAPE from 2023-01-01 to 2023-07-01: 4.7
MAPE from 2023-07-01 to 2024-01-01: 7.2


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 12.6


## 0012

In [None]:
feature_df = df_0012

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 87 entries, 202 to 297
Data columns (total 77 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               87 non-null     int64         
 1   Month              87 non-null     int64         
 2   Time               87 non-null     datetime64[ns]
 3   Group Description  87 non-null     object        
 4   Average_price      87 non-null     float64       
 5   PNGASEUUSDM_1      87 non-null     float64       
 6   POILBREUSDM_1      87 non-null     float64       
 7   PPOILUSDM_1        87 non-null     float64       
 8   WPU0652013A_1      87 non-null     float64       
 9   Electricity_1      87 non-null     float64       
 10  PNGASEUUSDM_2      87 non-null     float64       
 11  POILBREUSDM_2      87 non-null     float64       
 12  PPOILUSDM_2        87 non-null     float64       
 13  WPU0652013A_2      87 non-null     float64       
 14  Electricity_2 

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.4)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 4.9
MAPE from 2019-07-01 to 2020-01-01: 6.7
MAPE from 2020-01-01 to 2020-07-01: 4.6
MAPE from 2020-07-01 to 2021-01-01: 11.8
MAPE from 2021-01-01 to 2021-07-01: 7.2
MAPE from 2021-07-01 to 2022-01-01: 2.8
MAPE from 2022-01-01 to 2022-07-01: 33.8
MAPE from 2022-07-01 to 2023-01-01: 28.2
MAPE from 2023-01-01 to 2023-07-01: 13.4
MAPE from 2023-07-01 to 2024-01-01: 15.8


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 12.9


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.4)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 5.0
MAPE from 2019-07-01 to 2020-01-01: 6.6
MAPE from 2020-01-01 to 2020-07-01: 4.6
MAPE from 2020-07-01 to 2021-01-01: 12.5
MAPE from 2021-01-01 to 2021-07-01: 7.5
MAPE from 2021-07-01 to 2022-01-01: 2.8
MAPE from 2022-01-01 to 2022-07-01: 35.4
MAPE from 2022-07-01 to 2023-01-01: 28.2
MAPE from 2023-01-01 to 2023-07-01: 13.4
MAPE from 2023-07-01 to 2024-01-01: 16.2


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 13.2


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.4)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 5.2
MAPE from 2019-07-01 to 2020-01-01: 6.1
MAPE from 2020-01-01 to 2020-07-01: 4.6
MAPE from 2020-07-01 to 2021-01-01: 12.5
MAPE from 2021-01-01 to 2021-07-01: 7.0
MAPE from 2021-07-01 to 2022-01-01: 2.8
MAPE from 2022-01-01 to 2022-07-01: 35.4
MAPE from 2022-07-01 to 2023-01-01: 35.5
MAPE from 2023-01-01 to 2023-07-01: 14.1
MAPE from 2023-07-01 to 2024-01-01: 12.9


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 13.6


# Builder

In [None]:
feature_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Thesis/dataframe/builder_feature.csv',index_col=0)
feature_df['Time'] = pd.to_datetime(feature_df['Time'])
print(feature_df)

     RM20/0001  RM20/0015  RM20/0020  Year  Month       Time  \
14       False      False       True  2016      1 2016-01-31   
15       False      False       True  2016      2 2016-02-29   
16       False      False       True  2016      3 2016-03-31   
17       False      False       True  2016      4 2016-04-30   
18       False      False       True  2016      5 2016-05-31   
..         ...        ...        ...   ...    ...        ...   
323       True      False      False  2023      8 2023-08-25   
324       True      False      False  2023      9 2023-09-30   
325       True      False      False  2023     10 2023-10-31   
326       True      False      False  2023     11 2023-11-30   
327       True      False      False  2023     12 2023-12-29   

    Group Description  Average_price  WPU01130232_1  PMAIZMTUSDM_1  ...  \
14            builder       0.725000        142.400     164.004402  ...   
15            builder       0.806851        150.600     161.098877  ...   
16    

In [None]:
df_0001 = feature_df[feature_df['RM20/0001'] == 1]
print(df_0001.info())

<class 'pandas.core.frame.DataFrame'>
Index: 94 entries, 232 to 327
Data columns (total 68 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM20/0001          94 non-null     bool          
 1   RM20/0015          94 non-null     bool          
 2   RM20/0020          94 non-null     bool          
 3   Year               94 non-null     int64         
 4   Month              94 non-null     int64         
 5   Time               94 non-null     datetime64[ns]
 6   Group Description  94 non-null     object        
 7   Average_price      94 non-null     float64       
 8   WPU01130232_1      94 non-null     float64       
 9   PMAIZMTUSDM_1      94 non-null     float64       
 10  WPU07110224_1      94 non-null     float64       
 11  WPU065202_1        94 non-null     float64       
 12  WPU01130232_2      94 non-null     float64       
 13  PMAIZMTUSDM_2      94 non-null     float64       
 14  WPU07110224_2 

In [None]:
df_0015 = feature_df[feature_df['RM20/0015'] == 1]
print(df_0015.info())

<class 'pandas.core.frame.DataFrame'>
Index: 93 entries, 122 to 217
Data columns (total 68 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM20/0001          93 non-null     bool          
 1   RM20/0015          93 non-null     bool          
 2   RM20/0020          93 non-null     bool          
 3   Year               93 non-null     int64         
 4   Month              93 non-null     int64         
 5   Time               93 non-null     datetime64[ns]
 6   Group Description  93 non-null     object        
 7   Average_price      93 non-null     float64       
 8   WPU01130232_1      93 non-null     float64       
 9   PMAIZMTUSDM_1      93 non-null     float64       
 10  WPU07110224_1      93 non-null     float64       
 11  WPU065202_1        93 non-null     float64       
 12  WPU01130232_2      93 non-null     float64       
 13  PMAIZMTUSDM_2      93 non-null     float64       
 14  WPU07110224_2 

In [None]:
df_0020 = feature_df[feature_df['RM20/0020'] == 1]
print(df_0020.info())

<class 'pandas.core.frame.DataFrame'>
Index: 95 entries, 14 to 109
Data columns (total 68 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM20/0001          95 non-null     bool          
 1   RM20/0015          95 non-null     bool          
 2   RM20/0020          95 non-null     bool          
 3   Year               95 non-null     int64         
 4   Month              95 non-null     int64         
 5   Time               95 non-null     datetime64[ns]
 6   Group Description  95 non-null     object        
 7   Average_price      95 non-null     float64       
 8   WPU01130232_1      95 non-null     float64       
 9   PMAIZMTUSDM_1      95 non-null     float64       
 10  WPU07110224_1      95 non-null     float64       
 11  WPU065202_1        95 non-null     float64       
 12  WPU01130232_2      95 non-null     float64       
 13  PMAIZMTUSDM_2      95 non-null     float64       
 14  WPU07110224_2  

## 0001

In [None]:
feature_df = df_0001

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 94 entries, 232 to 327
Data columns (total 65 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               94 non-null     int64         
 1   Month              94 non-null     int64         
 2   Time               94 non-null     datetime64[ns]
 3   Group Description  94 non-null     object        
 4   Average_price      94 non-null     float64       
 5   WPU01130232_1      94 non-null     float64       
 6   PMAIZMTUSDM_1      94 non-null     float64       
 7   WPU07110224_1      94 non-null     float64       
 8   WPU065202_1        94 non-null     float64       
 9   WPU01130232_2      94 non-null     float64       
 10  PMAIZMTUSDM_2      94 non-null     float64       
 11  WPU07110224_2      94 non-null     float64       
 12  WPU065202_2        94 non-null     float64       
 13  WPU01130232_3      94 non-null     float64       
 14  PMAIZMTUSDM_3 

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 9.7
MAPE from 2019-07-01 to 2020-01-01: 11.0
MAPE from 2020-01-01 to 2020-07-01: 7.0
MAPE from 2020-07-01 to 2021-01-01: 7.6
MAPE from 2021-01-01 to 2021-07-01: 6.4
MAPE from 2021-07-01 to 2022-01-01: 19.3
MAPE from 2022-01-01 to 2022-07-01: 32.7
MAPE from 2022-07-01 to 2023-01-01: 8.8
MAPE from 2023-01-01 to 2023-07-01: 30.2
MAPE from 2023-07-01 to 2024-01-01: 6.1


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 13.9


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.4)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 11.8
MAPE from 2019-07-01 to 2020-01-01: 13.2
MAPE from 2020-01-01 to 2020-07-01: 7.8
MAPE from 2020-07-01 to 2021-01-01: 6.8
MAPE from 2021-01-01 to 2021-07-01: 4.9
MAPE from 2021-07-01 to 2022-01-01: 25.4
MAPE from 2022-01-01 to 2022-07-01: 54.7
MAPE from 2022-07-01 to 2023-01-01: 13.7
MAPE from 2023-01-01 to 2023-07-01: 30.0
MAPE from 2023-07-01 to 2024-01-01: 6.8


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 17.5


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.4)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 12.9
MAPE from 2019-07-01 to 2020-01-01: 9.6
MAPE from 2020-01-01 to 2020-07-01: 11.0
MAPE from 2020-07-01 to 2021-01-01: 7.3
MAPE from 2021-01-01 to 2021-07-01: 5.6
MAPE from 2021-07-01 to 2022-01-01: 27.2
MAPE from 2022-01-01 to 2022-07-01: 58.3
MAPE from 2022-07-01 to 2023-01-01: 13.8
MAPE from 2023-01-01 to 2023-07-01: 37.4
MAPE from 2023-07-01 to 2024-01-01: 9.8


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 19.3


## 0015

In [None]:
feature_df = df_0015

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 93 entries, 122 to 217
Data columns (total 65 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               93 non-null     int64         
 1   Month              93 non-null     int64         
 2   Time               93 non-null     datetime64[ns]
 3   Group Description  93 non-null     object        
 4   Average_price      93 non-null     float64       
 5   WPU01130232_1      93 non-null     float64       
 6   PMAIZMTUSDM_1      93 non-null     float64       
 7   WPU07110224_1      93 non-null     float64       
 8   WPU065202_1        93 non-null     float64       
 9   WPU01130232_2      93 non-null     float64       
 10  PMAIZMTUSDM_2      93 non-null     float64       
 11  WPU07110224_2      93 non-null     float64       
 12  WPU065202_2        93 non-null     float64       
 13  WPU01130232_3      93 non-null     float64       
 14  PMAIZMTUSDM_3 

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.3)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 5.1
MAPE from 2019-07-01 to 2020-01-01: 6.0
MAPE from 2020-01-01 to 2020-07-01: 15.7
MAPE from 2020-07-01 to 2021-01-01: 3.5
MAPE from 2021-01-01 to 2021-07-01: 9.1
MAPE from 2021-07-01 to 2022-01-01: 10.7
MAPE from 2022-01-01 to 2022-07-01: 24.9
MAPE from 2022-07-01 to 2023-01-01: 29.6
MAPE from 2023-01-01 to 2023-07-01: 10.5
MAPE from 2023-07-01 to 2024-01-01: 6.0


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 12.1


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.3)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 5.1
MAPE from 2019-07-01 to 2020-01-01: 6.0
MAPE from 2020-01-01 to 2020-07-01: 15.7
MAPE from 2020-07-01 to 2021-01-01: 3.5
MAPE from 2021-01-01 to 2021-07-01: 9.1
MAPE from 2021-07-01 to 2022-01-01: 10.7
MAPE from 2022-01-01 to 2022-07-01: 25.3
MAPE from 2022-07-01 to 2023-01-01: 28.4
MAPE from 2023-01-01 to 2023-07-01: 8.3
MAPE from 2023-07-01 to 2024-01-01: 6.1


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 11.8


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 5.1
MAPE from 2019-07-01 to 2020-01-01: 6.0
MAPE from 2020-01-01 to 2020-07-01: 15.7
MAPE from 2020-07-01 to 2021-01-01: 3.5
MAPE from 2021-01-01 to 2021-07-01: 9.1
MAPE from 2021-07-01 to 2022-01-01: 10.7
MAPE from 2022-01-01 to 2022-07-01: 26.3
MAPE from 2022-07-01 to 2023-01-01: 29.1
MAPE from 2023-01-01 to 2023-07-01: 7.7
MAPE from 2023-07-01 to 2024-01-01: 7.0


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 12.0


## 0020

In [None]:
feature_df = df_0020

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 95 entries, 14 to 109
Data columns (total 65 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               95 non-null     int64         
 1   Month              95 non-null     int64         
 2   Time               95 non-null     datetime64[ns]
 3   Group Description  95 non-null     object        
 4   Average_price      95 non-null     float64       
 5   WPU01130232_1      95 non-null     float64       
 6   PMAIZMTUSDM_1      95 non-null     float64       
 7   WPU07110224_1      95 non-null     float64       
 8   WPU065202_1        95 non-null     float64       
 9   WPU01130232_2      95 non-null     float64       
 10  PMAIZMTUSDM_2      95 non-null     float64       
 11  WPU07110224_2      95 non-null     float64       
 12  WPU065202_2        95 non-null     float64       
 13  WPU01130232_3      95 non-null     float64       
 14  PMAIZMTUSDM_3  

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 9.2
MAPE from 2019-07-01 to 2020-01-01: 10.9
MAPE from 2020-01-01 to 2020-07-01: 8.6
MAPE from 2020-07-01 to 2021-01-01: 10.6
MAPE from 2021-01-01 to 2021-07-01: 10.5
MAPE from 2021-07-01 to 2022-01-01: 9.4
MAPE from 2022-01-01 to 2022-07-01: 16.1
MAPE from 2022-07-01 to 2023-01-01: 8.3
MAPE from 2023-01-01 to 2023-07-01: 9.8
MAPE from 2023-07-01 to 2024-01-01: 13.4


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 10.7


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 8.9
MAPE from 2019-07-01 to 2020-01-01: 10.9
MAPE from 2020-01-01 to 2020-07-01: 9.1
MAPE from 2020-07-01 to 2021-01-01: 11.6
MAPE from 2021-01-01 to 2021-07-01: 12.1
MAPE from 2021-07-01 to 2022-01-01: 13.6
MAPE from 2022-01-01 to 2022-07-01: 18.2
MAPE from 2022-07-01 to 2023-01-01: 6.1
MAPE from 2023-01-01 to 2023-07-01: 18.5
MAPE from 2023-07-01 to 2024-01-01: 23.0


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 13.2


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 11.1
MAPE from 2019-07-01 to 2020-01-01: 7.1
MAPE from 2020-01-01 to 2020-07-01: 11.5
MAPE from 2020-07-01 to 2021-01-01: 13.2
MAPE from 2021-01-01 to 2021-07-01: 15.2
MAPE from 2021-07-01 to 2022-01-01: 17.8
MAPE from 2022-01-01 to 2022-07-01: 24.6
MAPE from 2022-07-01 to 2023-01-01: 14.2
MAPE from 2023-01-01 to 2023-07-01: 29.0
MAPE from 2023-07-01 to 2024-01-01: 41.8


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 18.6


# Anionic Surfactant

In [None]:
feature_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Thesis/dataframe/anionic surfactant_feature.csv',index_col=0)
feature_df['Time'] = pd.to_datetime(feature_df['Time'])
print(feature_df)

     RM10/0001  RM10/0002  Year  Month       Time   Group Description  \
12       False       True  2016      1 2016-01-31  anionic surfactant   
13       False       True  2016      2 2016-02-29  anionic surfactant   
14       False       True  2016      3 2016-03-31  anionic surfactant   
15       False       True  2016      4 2016-04-30  anionic surfactant   
16       False       True  2016      5 2016-05-31  anionic surfactant   
..         ...        ...   ...    ...        ...                 ...   
216       True      False  2023      8 2023-08-31  anionic surfactant   
217       True      False  2023      9 2023-09-30  anionic surfactant   
218       True      False  2023     10 2023-10-31  anionic surfactant   
219       True      False  2023     11 2023-11-30  anionic surfactant   
220       True      False  2023     12 2023-12-31  anionic surfactant   

     Average_price  PNGASEUUSDM_1  PPOILUSDM_1  Electricity_1  ...      AR_3  \
12        1.235000       5.810000   520.602

In [None]:
df_0001 = feature_df[feature_df['RM10/0001'] == 1]
print(df_0001.info())

<class 'pandas.core.frame.DataFrame'>
Index: 93 entries, 125 to 220
Data columns (total 55 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM10/0001          93 non-null     bool          
 1   RM10/0002          93 non-null     bool          
 2   Year               93 non-null     int64         
 3   Month              93 non-null     int64         
 4   Time               93 non-null     datetime64[ns]
 5   Group Description  93 non-null     object        
 6   Average_price      93 non-null     float64       
 7   PNGASEUUSDM_1      93 non-null     float64       
 8   PPOILUSDM_1        93 non-null     float64       
 9   Electricity_1      93 non-null     float64       
 10  PNGASEUUSDM_2      93 non-null     float64       
 11  PPOILUSDM_2        93 non-null     float64       
 12  Electricity_2      93 non-null     float64       
 13  PNGASEUUSDM_3      93 non-null     float64       
 14  PPOILUSDM_3   

In [None]:
df_0002 = feature_df[feature_df['RM10/0002'] == 1]
print(df_0002.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 12 to 107
Data columns (total 55 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM10/0001          96 non-null     bool          
 1   RM10/0002          96 non-null     bool          
 2   Year               96 non-null     int64         
 3   Month              96 non-null     int64         
 4   Time               96 non-null     datetime64[ns]
 5   Group Description  96 non-null     object        
 6   Average_price      96 non-null     float64       
 7   PNGASEUUSDM_1      96 non-null     float64       
 8   PPOILUSDM_1        96 non-null     float64       
 9   Electricity_1      96 non-null     float64       
 10  PNGASEUUSDM_2      96 non-null     float64       
 11  PPOILUSDM_2        96 non-null     float64       
 12  Electricity_2      96 non-null     float64       
 13  PNGASEUUSDM_3      96 non-null     float64       
 14  PPOILUSDM_3    

## 0001

In [None]:
feature_df = df_0001

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 93 entries, 125 to 220
Data columns (total 53 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               93 non-null     int64         
 1   Month              93 non-null     int64         
 2   Time               93 non-null     datetime64[ns]
 3   Group Description  93 non-null     object        
 4   Average_price      93 non-null     float64       
 5   PNGASEUUSDM_1      93 non-null     float64       
 6   PPOILUSDM_1        93 non-null     float64       
 7   Electricity_1      93 non-null     float64       
 8   PNGASEUUSDM_2      93 non-null     float64       
 9   PPOILUSDM_2        93 non-null     float64       
 10  Electricity_2      93 non-null     float64       
 11  PNGASEUUSDM_3      93 non-null     float64       
 12  PPOILUSDM_3        93 non-null     float64       
 13  Electricity_3      93 non-null     float64       
 14  PNGASEUUSDM_4 

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 15.2
MAPE from 2019-07-01 to 2020-01-01: 12.0
MAPE from 2020-01-01 to 2020-07-01: 8.5
MAPE from 2020-07-01 to 2021-01-01: 16.0
MAPE from 2021-01-01 to 2021-07-01: 14.9
MAPE from 2021-07-01 to 2022-01-01: 3.6
MAPE from 2022-01-01 to 2022-07-01: 15.8
MAPE from 2022-07-01 to 2023-01-01: 4.1
MAPE from 2023-01-01 to 2023-07-01: 14.8
MAPE from 2023-07-01 to 2024-01-01: 8.2


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 11.3


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 15.2
MAPE from 2019-07-01 to 2020-01-01: 12.0
MAPE from 2020-01-01 to 2020-07-01: 10.0
MAPE from 2020-07-01 to 2021-01-01: 16.4
MAPE from 2021-01-01 to 2021-07-01: 13.3
MAPE from 2021-07-01 to 2022-01-01: 3.2
MAPE from 2022-01-01 to 2022-07-01: 18.1
MAPE from 2022-07-01 to 2023-01-01: 5.0
MAPE from 2023-01-01 to 2023-07-01: 18.9
MAPE from 2023-07-01 to 2024-01-01: 9.9


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 12.2


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.3)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 15.2
MAPE from 2019-07-01 to 2020-01-01: 13.6
MAPE from 2020-01-01 to 2020-07-01: 11.1
MAPE from 2020-07-01 to 2021-01-01: 16.7
MAPE from 2021-01-01 to 2021-07-01: 6.7
MAPE from 2021-07-01 to 2022-01-01: 3.8
MAPE from 2022-01-01 to 2022-07-01: 21.3
MAPE from 2022-07-01 to 2023-01-01: 6.4
MAPE from 2023-01-01 to 2023-07-01: 16.6
MAPE from 2023-07-01 to 2024-01-01: 9.8


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 12.1


## 0002

In [None]:
feature_df = df_0002

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 12 to 107
Data columns (total 53 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               96 non-null     int64         
 1   Month              96 non-null     int64         
 2   Time               96 non-null     datetime64[ns]
 3   Group Description  96 non-null     object        
 4   Average_price      96 non-null     float64       
 5   PNGASEUUSDM_1      96 non-null     float64       
 6   PPOILUSDM_1        96 non-null     float64       
 7   Electricity_1      96 non-null     float64       
 8   PNGASEUUSDM_2      96 non-null     float64       
 9   PPOILUSDM_2        96 non-null     float64       
 10  Electricity_2      96 non-null     float64       
 11  PNGASEUUSDM_3      96 non-null     float64       
 12  PPOILUSDM_3        96 non-null     float64       
 13  Electricity_3      96 non-null     float64       
 14  PNGASEUUSDM_4  

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 4.2
MAPE from 2019-07-01 to 2020-01-01: 5.1
MAPE from 2020-01-01 to 2020-07-01: 6.4
MAPE from 2020-07-01 to 2021-01-01: 7.9
MAPE from 2021-01-01 to 2021-07-01: 9.5
MAPE from 2021-07-01 to 2022-01-01: 5.5
MAPE from 2022-01-01 to 2022-07-01: 13.2
MAPE from 2022-07-01 to 2023-01-01: 6.5
MAPE from 2023-01-01 to 2023-07-01: 15.7
MAPE from 2023-07-01 to 2024-01-01: 6.7


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 8.1


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 3.6
MAPE from 2019-07-01 to 2020-01-01: 4.3
MAPE from 2020-01-01 to 2020-07-01: 6.3
MAPE from 2020-07-01 to 2021-01-01: 8.5
MAPE from 2021-01-01 to 2021-07-01: 11.6
MAPE from 2021-07-01 to 2022-01-01: 5.7
MAPE from 2022-01-01 to 2022-07-01: 13.8
MAPE from 2022-07-01 to 2023-01-01: 6.8
MAPE from 2023-01-01 to 2023-07-01: 16.2
MAPE from 2023-07-01 to 2024-01-01: 7.2


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 8.4


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 4.7
MAPE from 2019-07-01 to 2020-01-01: 4.6
MAPE from 2020-01-01 to 2020-07-01: 6.4
MAPE from 2020-07-01 to 2021-01-01: 9.7
MAPE from 2021-01-01 to 2021-07-01: 15.8
MAPE from 2021-07-01 to 2022-01-01: 10.8
MAPE from 2022-01-01 to 2022-07-01: 18.9
MAPE from 2022-07-01 to 2023-01-01: 10.9
MAPE from 2023-01-01 to 2023-07-01: 13.1
MAPE from 2023-07-01 to 2024-01-01: 7.6


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 10.2


# Solvents

In [None]:
feature_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Thesis/dataframe/solvent_feature.csv',index_col=0)
feature_df['Time'] = pd.to_datetime(feature_df['Time'])
print(feature_df)

     RM08/0010  RM08/0017  Year  Month       Time Group Description  \
13       False       True  2016      1 2016-01-31           solvent   
14       False       True  2016      2 2016-02-29           solvent   
15       False       True  2016      3 2016-03-31           solvent   
16       False       True  2016      4 2016-04-30           solvent   
17       False       True  2016      5 2016-05-31           solvent   
..         ...        ...   ...    ...        ...               ...   
215       True      False  2023      8 2023-08-31           solvent   
216       True      False  2023      9 2023-09-30           solvent   
217       True      False  2023     10 2023-10-24           solvent   
218       True      False  2023     11 2023-11-30           solvent   
219       True      False  2023     12 2023-12-20           solvent   

     Average_price  PPOILUSDM_1  PPOILUSDM_2  PPOILUSDM_3  ...      AR_3  \
13        1.031000   520.602568   503.163620   530.246640  ...  1.07000

In [None]:
df_0010 = feature_df[feature_df['RM08/0010'] == 1]
print(df_0010.info())

<class 'pandas.core.frame.DataFrame'>
Index: 91 entries, 124 to 219
Data columns (total 31 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM08/0010          91 non-null     bool          
 1   RM08/0017          91 non-null     bool          
 2   Year               91 non-null     int64         
 3   Month              91 non-null     int64         
 4   Time               91 non-null     datetime64[ns]
 5   Group Description  91 non-null     object        
 6   Average_price      91 non-null     float64       
 7   PPOILUSDM_1        91 non-null     float64       
 8   PPOILUSDM_2        91 non-null     float64       
 9   PPOILUSDM_3        91 non-null     float64       
 10  PPOILUSDM_4        91 non-null     float64       
 11  PPOILUSDM_5        91 non-null     float64       
 12  PPOILUSDM_6        91 non-null     float64       
 13  PPOILUSDM_7        91 non-null     float64       
 14  PPOILUSDM_8   

In [None]:
df_0017 = feature_df[feature_df['RM08/0017'] == 1]
print(df_0017.info())

<class 'pandas.core.frame.DataFrame'>
Index: 95 entries, 13 to 108
Data columns (total 31 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RM08/0010          95 non-null     bool          
 1   RM08/0017          95 non-null     bool          
 2   Year               95 non-null     int64         
 3   Month              95 non-null     int64         
 4   Time               95 non-null     datetime64[ns]
 5   Group Description  95 non-null     object        
 6   Average_price      95 non-null     float64       
 7   PPOILUSDM_1        95 non-null     float64       
 8   PPOILUSDM_2        95 non-null     float64       
 9   PPOILUSDM_3        95 non-null     float64       
 10  PPOILUSDM_4        95 non-null     float64       
 11  PPOILUSDM_5        95 non-null     float64       
 12  PPOILUSDM_6        95 non-null     float64       
 13  PPOILUSDM_7        95 non-null     float64       
 14  PPOILUSDM_8    

## 0010

In [None]:
feature_df = df_0010

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 91 entries, 124 to 219
Data columns (total 29 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               91 non-null     int64         
 1   Month              91 non-null     int64         
 2   Time               91 non-null     datetime64[ns]
 3   Group Description  91 non-null     object        
 4   Average_price      91 non-null     float64       
 5   PPOILUSDM_1        91 non-null     float64       
 6   PPOILUSDM_2        91 non-null     float64       
 7   PPOILUSDM_3        91 non-null     float64       
 8   PPOILUSDM_4        91 non-null     float64       
 9   PPOILUSDM_5        91 non-null     float64       
 10  PPOILUSDM_6        91 non-null     float64       
 11  PPOILUSDM_7        91 non-null     float64       
 12  PPOILUSDM_8        91 non-null     float64       
 13  PPOILUSDM_9        91 non-null     float64       
 14  PPOILUSDM_10  

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.3)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 13.5
MAPE from 2019-07-01 to 2020-01-01: 23.2
MAPE from 2020-01-01 to 2020-07-01: 18.6
MAPE from 2020-07-01 to 2021-01-01: 14.6
MAPE from 2021-01-01 to 2021-07-01: 13.3
MAPE from 2021-07-01 to 2022-01-01: 24.8
MAPE from 2022-01-01 to 2022-07-01: 31.6
MAPE from 2022-07-01 to 2023-01-01: 38.7
MAPE from 2023-01-01 to 2023-07-01: 30.8
MAPE from 2023-07-01 to 2024-01-01: 61.2


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 27.0


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.3)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 13.4
MAPE from 2019-07-01 to 2020-01-01: 23.2
MAPE from 2020-01-01 to 2020-07-01: 19.4
MAPE from 2020-07-01 to 2021-01-01: 13.4
MAPE from 2021-01-01 to 2021-07-01: 13.9
MAPE from 2021-07-01 to 2022-01-01: 24.0
MAPE from 2022-01-01 to 2022-07-01: 31.8
MAPE from 2022-07-01 to 2023-01-01: 38.9
MAPE from 2023-01-01 to 2023-07-01: 37.5
MAPE from 2023-07-01 to 2024-01-01: 64.0


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 27.9


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.4)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 15.1
MAPE from 2019-07-01 to 2020-01-01: 20.9
MAPE from 2020-01-01 to 2020-07-01: 18.6
MAPE from 2020-07-01 to 2021-01-01: 13.5
MAPE from 2021-01-01 to 2021-07-01: 16.2
MAPE from 2021-07-01 to 2022-01-01: 32.6
MAPE from 2022-01-01 to 2022-07-01: 41.9
MAPE from 2022-07-01 to 2023-01-01: 36.6
MAPE from 2023-01-01 to 2023-07-01: 40.6
MAPE from 2023-07-01 to 2024-01-01: 65.6


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 30.2


## 0017

In [None]:
feature_df = df_0017

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 95 entries, 13 to 108
Data columns (total 29 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               95 non-null     int64         
 1   Month              95 non-null     int64         
 2   Time               95 non-null     datetime64[ns]
 3   Group Description  95 non-null     object        
 4   Average_price      95 non-null     float64       
 5   PPOILUSDM_1        95 non-null     float64       
 6   PPOILUSDM_2        95 non-null     float64       
 7   PPOILUSDM_3        95 non-null     float64       
 8   PPOILUSDM_4        95 non-null     float64       
 9   PPOILUSDM_5        95 non-null     float64       
 10  PPOILUSDM_6        95 non-null     float64       
 11  PPOILUSDM_7        95 non-null     float64       
 12  PPOILUSDM_8        95 non-null     float64       
 13  PPOILUSDM_9        95 non-null     float64       
 14  PPOILUSDM_10   

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.01)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 3.1
MAPE from 2019-07-01 to 2020-01-01: 2.0
MAPE from 2020-01-01 to 2020-07-01: 0.5
MAPE from 2020-07-01 to 2021-01-01: 2.3
MAPE from 2021-01-01 to 2021-07-01: 4.9
MAPE from 2021-07-01 to 2022-01-01: 6.0
MAPE from 2022-01-01 to 2022-07-01: 8.7
MAPE from 2022-07-01 to 2023-01-01: 17.2
MAPE from 2023-01-01 to 2023-07-01: 14.0
MAPE from 2023-07-01 to 2024-01-01: 4.3


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 6.3


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.01)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 5.6
MAPE from 2019-07-01 to 2020-01-01: 5.5
MAPE from 2020-01-01 to 2020-07-01: 0.5
MAPE from 2020-07-01 to 2021-01-01: 3.7
MAPE from 2021-01-01 to 2021-07-01: 6.5
MAPE from 2021-07-01 to 2022-01-01: 18.8
MAPE from 2022-01-01 to 2022-07-01: 7.5
MAPE from 2022-07-01 to 2023-01-01: 28.0
MAPE from 2023-01-01 to 2023-07-01: 20.0
MAPE from 2023-07-01 to 2024-01-01: 14.4


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 11.1


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.01)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 4.2
MAPE from 2019-07-01 to 2020-01-01: 7.0
MAPE from 2020-01-01 to 2020-07-01: 0.6
MAPE from 2020-07-01 to 2021-01-01: 3.6
MAPE from 2021-01-01 to 2021-07-01: 8.5
MAPE from 2021-07-01 to 2022-01-01: 33.9
MAPE from 2022-01-01 to 2022-07-01: 16.8
MAPE from 2022-07-01 to 2023-01-01: 31.3
MAPE from 2023-01-01 to 2023-07-01: 30.4
MAPE from 2023-07-01 to 2024-01-01: 45.1


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 18.1


# Fatty acid

In [None]:
feature_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Thesis/dataframe/fatty acid_feature.csv',index_col=0)
feature_df['Time'] = pd.to_datetime(feature_df['Time'])
print(feature_df)

     RM14/0001  Year  Month       Time Group Description  Average_price  \
14        True  2016      1 2016-01-29        fatty acid       0.650000   
15        True  2016      2 2016-02-29        fatty acid       0.749500   
16        True  2016      3 2016-03-31        fatty acid       0.904000   
17        True  2016      4 2016-04-30        fatty acid       0.980333   
18        True  2016      5 2016-05-31        fatty acid       0.939663   
..         ...   ...    ...        ...               ...            ...   
105       True  2023      8 2023-08-31        fatty acid       1.617296   
106       True  2023      9 2023-09-30        fatty acid       1.371400   
107       True  2023     10 2023-10-31        fatty acid       1.534868   
108       True  2023     11 2023-11-30        fatty acid       1.386338   
109       True  2023     12 2023-12-29        fatty acid       1.502477   

     PPOILUSDM_1  PPOILUSDM_2  PPOILUSDM_3  PPOILUSDM_4  ...      AR_3  \
14    520.602568   503.16

In [None]:
columns_to_drop = [column for column in feature_df.columns if 'RM' in column]
feature_df = feature_df.drop(columns=columns_to_drop)
print(feature_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, 14 to 109
Data columns (total 29 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Year               96 non-null     int64         
 1   Month              96 non-null     int64         
 2   Time               96 non-null     datetime64[ns]
 3   Group Description  96 non-null     object        
 4   Average_price      96 non-null     float64       
 5   PPOILUSDM_1        96 non-null     float64       
 6   PPOILUSDM_2        96 non-null     float64       
 7   PPOILUSDM_3        96 non-null     float64       
 8   PPOILUSDM_4        96 non-null     float64       
 9   PPOILUSDM_5        96 non-null     float64       
 10  PPOILUSDM_6        96 non-null     float64       
 11  PPOILUSDM_7        96 non-null     float64       
 12  PPOILUSDM_8        96 non-null     float64       
 13  PPOILUSDM_9        96 non-null     float64       
 14  PPOILUSDM_10   

### 1-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_1(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 10.3
MAPE from 2019-07-01 to 2020-01-01: 14.2
MAPE from 2020-01-01 to 2020-07-01: 21.0
MAPE from 2020-07-01 to 2021-01-01: 22.6
MAPE from 2021-01-01 to 2021-07-01: 7.1
MAPE from 2021-07-01 to 2022-01-01: 11.3
MAPE from 2022-01-01 to 2022-07-01: 10.3
MAPE from 2022-07-01 to 2023-01-01: 8.7
MAPE from 2023-01-01 to 2023-07-01: 3.5
MAPE from 2023-07-01 to 2024-01-01: 9.2


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 11.8


### 3-month prediction


In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_3(start, end, 0.2)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 11.8
MAPE from 2019-07-01 to 2020-01-01: 11.1
MAPE from 2020-01-01 to 2020-07-01: 30.1
MAPE from 2020-07-01 to 2021-01-01: 23.3
MAPE from 2021-01-01 to 2021-07-01: 9.6
MAPE from 2021-07-01 to 2022-01-01: 8.5
MAPE from 2022-01-01 to 2022-07-01: 15.2
MAPE from 2022-07-01 to 2023-01-01: 12.8
MAPE from 2023-01-01 to 2023-07-01: 5.3
MAPE from 2023-07-01 to 2024-01-01: 7.3


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 13.5


### 6-month prediction

In [None]:
mape_values = []
for start, end in test_periods:
    mape = calculate_mape_6(start, end, 0.1)
    mape_values.append(mape)
    print(f"MAPE from {start} to {end}: {mape:.1f}")

MAPE from 2019-01-01 to 2019-07-01: 19.2
MAPE from 2019-07-01 to 2020-01-01: 12.3
MAPE from 2020-01-01 to 2020-07-01: 20.5
MAPE from 2020-07-01 to 2021-01-01: 29.9
MAPE from 2021-01-01 to 2021-07-01: 7.5
MAPE from 2021-07-01 to 2022-01-01: 14.5
MAPE from 2022-01-01 to 2022-07-01: 13.3
MAPE from 2022-07-01 to 2023-01-01: 6.9
MAPE from 2023-01-01 to 2023-07-01: 5.5
MAPE from 2023-07-01 to 2024-01-01: 8.0


In [None]:
average_mape = np.mean(mape_values)
print(f"Average MAPE: {average_mape:.1f}")

Average MAPE: 13.8
