# ARIMAX MODEL

https://www.datacamp.com/tutorial/arima
https://365datascience.com/tutorials/python-tutorials/arimax/
https://laurenliz22.github.io/arima_modeling_and_train_test_split

In [168]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split

In [57]:
file_path =  "./final with standardization/features_regression.csv"
df = pd.read_csv(file_path)

In [58]:
print(len(df))

1216


In [61]:
def corrected_mood(forecast):
    rounded = []
    
    for value in forecast:
        decimal_part = value - int(value)
        if decimal_part <= 0.5:
            rounded_value = int(value) 
        else:
            rounded_value = int(value) + 1  
        rounded.append(rounded_value)
    
    return rounded

In [103]:
file_path = "./final with standardization/features_regression.csv"
df = pd.read_csv(file_path)

### Arimax model -- general

In [111]:
df = pd.read_csv(file_path)
df['day'] = pd.to_datetime(df['day'])
df = df.set_index('day')
df = df.sort_index()

target = df['target']
variables = ['appCat.builtin', 'appCat.communication', 'activity',
             'appCat.utilities', 'appCat.entertainment', 'call',
             'appCat.unknown', 'mood', 'circumplex.valence', 'appCat.social',
             'sms', 'appCat.game', 'appCat.finance', 'appCat.weather',
             'circumplex.arousal', 'appCat.travel', 'appCat.office',
             'appCat.other', 'screen', 'wake_up', 'sleep', 'week_day', 'week_mood']
X = df[variables]
general_model = SARIMAX(endog=target, exog=X, order=(1, 1,3))
results = general_model.fit()
print(results.summary())

last_data = X.tail(5)
forecast = results.forecast(steps=5, exog=last_data)
cor_mood = corrected_mood(forecast)
print("Forecasted mood:", forecast, cor_mood)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


                               SARIMAX Results                                
Dep. Variable:                 target   No. Observations:                 1216
Model:               SARIMAX(1, 1, 3)   Log Likelihood               -1128.112
Date:                Sat, 19 Apr 2025   AIC                           2312.224
Time:                        19:21:43   BIC                           2455.094
Sample:                             0   HQIC                          2366.009
                               - 1216                                         
Covariance Type:                  opg                                         
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
appCat.builtin           0.0104      0.024      0.440      0.660      -0.036       0.057
appCat.communication     0.0717      0.032      2.252      0.024       0.009       0.134
activity    

  return get_prediction_index(
  return get_prediction_index(


### MINIMIZE AIC

In [69]:
df = pd.read_csv(file_path)
df['day'] = pd.to_datetime(df['day'])
df = df.set_index('day')
df = df.sort_index()

In [70]:
target = df['target']
variables = ['appCat.builtin', 'appCat.communication', 'activity',
             'appCat.utilities', 'appCat.entertainment', 'call',
             'appCat.unknown', 'mood', 'circumplex.valence', 'appCat.social',
             'sms', 'appCat.game', 'appCat.finance', 'appCat.weather',
             'circumplex.arousal', 'appCat.travel', 'appCat.office',
             'appCat.other', 'screen', 'wake_up', 'sleep', 'week_day', 'week_mood']
X = df[variables]

In [71]:
model = auto_arima(target, exogenous=X, seasonal=False, trace=True, error_action='ignore', suppress_warnings=True)
print(model.summary())

Performing stepwise search to minimize aic
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.55 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=8194.224, Time=0.02 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=3481.516, Time=0.03 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=6850.471, Time=0.08 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.06 sec
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=2714.703, Time=0.36 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=inf, Time=0.47 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.56 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=5933.805, Time=0.15 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=2699.975, Time=1.00 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=2700.385, Time=0.11 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=2700.137, Time=0.06 sec
 ARIMA(2,0,1)(0,0,0)[0] intercept   : AIC=2701.518, Time=0.37 sec
 ARIMA(1,0,2)(0,0,0)[0] intercept   : AIC=2701.812, Time=0.29 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=2700.965, T

### FORECAST RESULTS

In [74]:
last_data = X.tail(5)
forecast = results.forecast(steps=5, exog=last_data)
cor_mood = corrected_mood(forecast)
print("Forecasted mood:", forecast, cor_mood)

Forecasted mood: 1216    7.081581
1217    7.042201
1218    7.011064
1219    7.116517
1220    7.303902
Name: predicted_mean, dtype: float64 [7, 7, 7, 7, 7]


  return get_prediction_index(
  return get_prediction_index(


## TRAIN AND TEST SPLIT RESULTS

In [142]:
file_path =  "./final with standardization/features_regression.csv"
df = pd.read_csv(file_path)

In [144]:
df = pd.read_csv(file_path)
df['day'] = pd.to_datetime(df['day'])
df = df.set_index('day')
df = df.sort_index()

In [146]:
target = df['target']
variables = ['appCat.builtin', 'appCat.communication', 'activity',
            'appCat.utilities', 'appCat.entertainment', 'call',
            'appCat.unknown', 'mood', 'circumplex.valence', 'appCat.social',
            'sms', 'appCat.game', 'appCat.finance', 'appCat.weather',
            'circumplex.arousal', 'appCat.travel', 'appCat.office',
            'appCat.other', 'screen', 'wake_up', 'sleep', 'week_day', 'week_mood']
X = df[variables]

features_train, features_leftover, target_train, target_leftover = train_test_split(X, target, test_size=0.3, shuffle=False, random_state=42)
features_valid, features_test, target_dev, target_dev = train_test_split(features_leftover, target_leftover, test_size=0.5, shuffle=False, random_state=42)

In [182]:
model = SARIMAX(endog=target_train, exog=features_train, order=(1, 1, 1))
results = model.fit()

dev_predictions = results.forecast(steps=len(target_valid), exog=features_valid)

dev_mse = mean_squared_error(target_valid, dev_predictions)
dev_mae = mean_absolute_error(target_valid, dev_predictions)

print(results.summary())
print(f"Dev MSE: {dev_mse}")
print(f"Dev MAE: {dev_mae}")

last_data = X.tail(5)
forecast = results.forecast(steps=5, exog=last_data)
cor_mood = corrected_mood(forecast)
print("Forecasted mood:", forecast, cor_mood)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


                               SARIMAX Results                                
Dep. Variable:                 target   No. Observations:                  851
Model:               SARIMAX(1, 1, 1)   Log Likelihood                -793.325
Date:                Sat, 19 Apr 2025   AIC                           1638.651
Time:                        20:01:19   BIC                           1762.027
Sample:                             0   HQIC                          1685.910
                                - 851                                         
Covariance Type:                  opg                                         
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
appCat.builtin           0.0339      0.030      1.132      0.258      -0.025       0.093
appCat.communication     0.0760      0.038      2.007      0.045       0.002       0.150
activity    

  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(


### (0,0,0) model

In [184]:
model = SARIMAX(endog=y_train, exog=X_train, order=(0, 0, 0))
results = model.fit()

dev_predictions = results.forecast(steps=len(target_valid), exog=features_valid)

dev_mse = mean_squared_error(target_valid, dev_predictions)
dev_mae = mean_absolute_error(target_valid, dev_predictions)

print(results.summary())
print(f"Dev MSE: {dev_mse}")
print(f"Dev MAE: {dev_mae}")

last_data = X.tail(5)
forecast = results.forecast(steps=5, exog=last_data)
cor_mood = corrected_mood(forecast)
print("Forecasted mood:", forecast, cor_mood)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


                               SARIMAX Results                                
Dep. Variable:                 target   No. Observations:                  851
Model:                        SARIMAX   Log Likelihood                -807.405
Date:                Sat, 19 Apr 2025   AIC                           1662.810
Time:                        20:02:43   BIC                           1776.724
Sample:                             0   HQIC                          1706.442
                                - 851                                         
Covariance Type:                  opg                                         
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
appCat.builtin           0.0340      0.031      1.108      0.268      -0.026       0.094
appCat.communication     0.0601      0.039      1.524      0.127      -0.017       0.137
activity    

  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(


### (1,1,1) model

In [186]:
model = SARIMAX(endog=y_train, exog=X_train, order=(1, 1, 1))
results = model.fit()

dev_predictions = results.forecast(steps=len(target_valid), exog=features_valid)

dev_mse = mean_squared_error(target_valid, dev_predictions)
dev_mae = mean_absolute_error(target_valid, dev_predictions)

print(results.summary())
print(f"Dev MSE: {dev_mse}")
print(f"Dev MAE: {dev_mae}")

last_data = X.tail(5)
forecast = results.forecast(steps=5, exog=last_data)
cor_mood = corrected_mood(forecast)
print("Forecasted mood:", forecast, cor_mood)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


                               SARIMAX Results                                
Dep. Variable:                 target   No. Observations:                  851
Model:               SARIMAX(1, 1, 1)   Log Likelihood                -793.325
Date:                Sat, 19 Apr 2025   AIC                           1638.651
Time:                        20:03:04   BIC                           1762.027
Sample:                             0   HQIC                          1685.910
                                - 851                                         
Covariance Type:                  opg                                         
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
appCat.builtin           0.0339      0.030      1.132      0.258      -0.025       0.093
appCat.communication     0.0760      0.038      2.007      0.045       0.002       0.150
activity    

  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(


### (1,1,1) on test set

In [194]:
test_predictions = results.forecast(steps=len(target_test), exog=features_test)

test_mse = mean_squared_error(target_test, test_predictions)
test_mae = mean_absolute_error(target_test, test_predictions)

print(results.summary())
print("Mean Squared Error on Test Set:", test_mse)
print("Mean Absolute Error on Test Set:", test_mae)

last_data = X.tail(5)
forecast = results.forecast(steps=5, exog=last_data)
cor_mood = corrected_mood(forecast)
print("Forecasted mood:", forecast, cor_mood)

                               SARIMAX Results                                
Dep. Variable:                 target   No. Observations:                  851
Model:               SARIMAX(1, 1, 1)   Log Likelihood                -793.325
Date:                Sat, 19 Apr 2025   AIC                           1638.651
Time:                        20:04:54   BIC                           1762.027
Sample:                             0   HQIC                          1685.910
                                - 851                                         
Covariance Type:                  opg                                         
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
appCat.builtin           0.0339      0.030      1.132      0.258      -0.025       0.093
appCat.communication     0.0760      0.038      2.007      0.045       0.002       0.150
activity    

  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(


In [200]:
r2 = r2_score(target_test, test_predictions)
print("R²:", r2)

R²: 0.20177768959139308


### (1,0,3) model

In [202]:
model = SARIMAX(endog=y_train, exog=X_train, order=(1, 0, 3))
results = model.fit()

dev_predictions = results.forecast(steps=len(target_valid), exog=features_valid)

dev_mse = mean_squared_error(target_valid, dev_predictions)
dev_mae = mean_absolute_error(target_valid, dev_predictions)

print(results.summary())
print(f"Dev MSE: {dev_mse}")
print(f"Dev MAE: {dev_mae}")

last_data = X.tail(5)
forecast = results.forecast(steps=5, exog=last_data)
cor_mood = corrected_mood(forecast)
print("Forecasted mood:", forecast, cor_mood)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


                               SARIMAX Results                                
Dep. Variable:                 target   No. Observations:                  851
Model:               SARIMAX(1, 0, 3)   Log Likelihood                -803.013
Date:                Sat, 19 Apr 2025   AIC                           1662.025
Time:                        20:05:32   BIC                           1794.925
Sample:                             0   HQIC                          1712.930
                                - 851                                         
Covariance Type:                  opg                                         
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
appCat.builtin           0.0371      0.031      1.208      0.227      -0.023       0.097
appCat.communication     0.0598      0.039      1.550      0.121      -0.016       0.135
activity    

  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
