In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from itertools import combinations
from sklearn.metrics import r2_score

In [None]:
df = pd.read_excel(path_to_file)

In [None]:
y = df['Consumption']
exog = df[['GDP','House_constr','Workforse']]

In [None]:
adf_test = adfuller(y)
adf_result = {
    "ADF Statistic": adf_test[0],
    "p-value": adf_test[1],
    "Critical Values": adf_test[4],
}

In [None]:
adf_result

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(y, label="Consumption")
plt.title("Временной ряд: a_Consumption")
plt.legend()
plt.show()

In [None]:
# Первое дифференцирование
y_diff1 = y.diff().dropna()

# Проверка стационарности после первого дифференцирования
adf_test_diff1 = adfuller(y_diff1)
adf_result_diff1 = {
    "ADF Statistic": adf_test_diff1[0],
    "p-value": adf_test_diff1[1],
    "Critical Values": adf_test_diff1[4],
}
adf_result_diff1

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(y_diff1, label="Первое дифференцирование a_Consumption")
plt.title("Первое дифференцирование временного ряда")
plt.legend()
plt.show()

In [None]:
y_diff2 = y_diff1.diff().dropna()


adf_test_diff2 = adfuller(y_diff2)
adf_result_diff2 = {
    "ADF Statistic": adf_test_diff2[0],
    "p-value": adf_test_diff2[1],
    "Critical Values": adf_test_diff2[4],
}

In [None]:
adf_result_diff2

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(y_diff2, label="Второе дифференцирование a_Consumption")
plt.title("Второе дифференцирование временного ряда")
plt.legend()
plt.show()

In [None]:

exog_diff2 = exog.diff().dropna().diff().dropna()  

# Убедимся, что размеры зависимой и экзогенных переменных совпадают
exog_diff2 = exog_diff2.loc[y_diff2.index]

In [None]:
model = ARIMA(y_diff2, order=(0, 0, 1), exog=exog_diff2)
fitted_model = model.fit()

In [None]:
print(fitted_model.summary())

In [None]:
def library_models(y, exog, max_order=(0, 0, 0)):
    results = []

    for n_factors in range(1, len(exog.columns) + 1):
        
        for factors in combinations(exog.columns, n_factors):
            exog_subset = exog[list(factors)]

            try:
                model = ARIMA(y, order=max_order, exog=exog_subset)
                fitted_model = model.fit()

                # Прогноз и расчет R²
                y_pred = fitted_model.predict(start=y.index[0], end=y.index[-1], exog=exog_subset)
                r2 = r2_score(y, y_pred)

                # Results
                results.append({
                    "Factors": factors,
                    "R2": r2,
                    "AIC": fitted_model.aic,
                    "BIC": fitted_model.bic,
                    "Coef": fitted_model.params.to_dict()
                    })
            except Exception as e:
                # Обработка ошибки
                results.append({
                    "Factors": factors,
                    "R2": None,
                    "AIC": None,
                    "BIC": None,
                    "Coef": str(e)
                })
    return results

In [None]:
results = library_models(y, exog)

In [None]:
results_df = pd.DataFrame(results)

In [None]:
results_df.sort_values(by="R2")