In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import pandas as pd
from sklearn. metrics import mean_absolute_error 
from ydata_profiling import ProfileReport
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


class KolmogorovGaborPolynomial:
    """
    Class for constructing the Kolmogorov-Gabor polynomial.

    Attributes:
    ----------
    models_dict : dict
        Dictionary for storing trained models.

    partial_polynomial_df : DataFrame
        DataFrame for storing intermediate results during training.

    stop : int
        Number of iterations for training the model.
    """

    def __init__(self):
        """
        Initialize the KolmogorovGaborPolynomial class.
        """
        self.models_dict = {}  # Dictionary for storing models

    def fit(self, X, Y, stop=None):
        """
        Train the model based on input data.

        Parameters:
        ----------
        X : DataFrame
            Input data (features).
        Y : DataFrame or Series
            Target values.
        stop : int, optional
            Number of iterations for training the model (default is None, which means using all features).

        Returns:
        ----------
        model : LinearRegression
            The trained model at the last iteration.
        """
        if stop is None:
            stop = len(X.columns)
        self.stop = stop

        # Create a copy of X for modification
        local_X = X.copy()

        # Initial model (first iteration)
        model = LinearRegression()
        model.fit(local_X, Y)
        predictions = model.predict(local_X)

        # Create a DataFrame for storing intermediate results
        self.partial_polynomial_df = pd.DataFrame(index=Y.index)
        self.partial_polynomial_df['Y'] = Y.values.flatten()
        self.partial_polynomial_df['Y_pred'] = predictions.flatten()

        # Add the first column from local_X, squared, to partial_polynomial_df and remove it from local_X
        self.partial_polynomial_df[local_X.columns[0] + '^2'] = local_X.iloc[:, 0] ** 2
        local_X.drop(local_X.columns[0], axis=1, inplace=True)

        self.models_dict['1'] = model

        for i in range(2, stop + 1):
            # Add new polynomial feature of Y_pred
            self.partial_polynomial_df[f'Y_pred^{i}'] = (predictions ** i).flatten()

            # Limit prediction values to avoid overflow
            self.partial_polynomial_df.replace([np.inf, -np.inf], np.nan, inplace=True)
            self.partial_polynomial_df.fillna(0, inplace=True)

            # Add the next column from local_X, squared, to partial_polynomial_df, if available
            if not local_X.empty:
                self.partial_polynomial_df[local_X.columns[0] + '^2'] = local_X.iloc[:, 0] ** 2
                local_X.drop(local_X.columns[0], axis=1, inplace=True)

            # Train a new model with additional features
            model = LinearRegression()
            X_new = self.partial_polynomial_df.drop(columns='Y')
            model.fit(X_new, Y)
            predictions = model.predict(X_new)

            self.models_dict[str(i)] = model

        return self.models_dict[str(stop)]

    def predict(self, X, stop=None):
        """
        Make predictions based on the trained model.

        Parameters:
        ----------
        X : DataFrame
            Input data (features).
        stop : int, optional
            Number of iterations for prediction (default is None, which means using self.stop value).

        Returns:
        ----------
        predictions : ndarray
            Predicted values.
        """
        if stop is None:
            stop = self.stop

        # Create a copy of X for modification
        local_X = X.copy()

        # Initial predictions
        model = self.models_dict['1']
        predictions = model.predict(local_X)

        if stop == 1:
            return predictions

        # Create a DataFrame for storing intermediate prediction results
        predict_polynomial_df = pd.DataFrame(index=X.index)
        predict_polynomial_df['Y_pred'] = predictions.flatten()

        # Add the first column from local_X, squared, to predict_polynomial_df and remove it from local_X
        predict_polynomial_df[local_X.columns[0] + '^2'] = local_X.iloc[:, 0] ** 2
        local_X.drop(local_X.columns[0], axis=1, inplace=True)

        for i in range(2, stop + 1):
            # Add new polynomial feature of Y_pred
            predict_polynomial_df[f'Y_pred^{i}'] = (predictions ** i).flatten()

            # Limit prediction values to avoid overflow
            predict_polynomial_df.replace([np.inf, -np.inf], np.nan, inplace=True)
            predict_polynomial_df.fillna(0, inplace=True)

            # Add the next column from local_X, squared, to predict_polynomial_df, if available
            if not local_X.empty:
                predict_polynomial_df[local_X.columns[0] + '^2'] = local_X.iloc[:, 0] ** 2
                local_X.drop(local_X.columns[0], axis=1, inplace=True)

            model = self.models_dict[str(i)]
            predictions = model.predict(predict_polynomial_df)

        return predictions


# Пример использования класса
# Создаем экземпляр класса
kpg = KolmogorovGaborPolynomial()

# Загрузка данных
df = pd.read_excel(r'C:\Users\RobotComp.ru\PycharmProjects\fullPolynomialKolmagorovGabor\datasets\polynom_miroshnichenko.xlsx', sheet_name='data')
Y = df[['Металл и добыча (Y3)']]
X = df.drop(columns=['Металл и добыча (Y3)', 'Дата'])

# Разделение данных на обучающие и тестовые
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Переменные для хранения дисперсий, MAE, MSE и R^2
variances = []
maes = []
mses = []
r2_scores = []

# Цикл по количеству колонок в X
for i in range(1, len(X_train.columns) + 1):
    # Обучение модели с текущим значением stop
    kpg.fit(X_train, Y_train, stop=i)
    # print(kpg.models_dict)
    # Получение предсказаний
    predictions = kpg.predict(X=X_test, stop=i)
    # Вычисление ошибок
    errors = Y_test.values.flatten() - predictions.flatten()
    # Вычисление дисперсии ошибок
    sample_variance = errors.var(ddof=1)
    # Вычисление MAE
    mae = mean_absolute_error(Y_test.values.flatten(), predictions.flatten())
    # Вычисление MSE
    mse = mean_squared_error(Y_test.values.flatten(), predictions.flatten())
    # Вычисление R^2
    r2 = r2_score(Y_test.values.flatten(), predictions.flatten())
    # Добавление дисперсии, MAE, MSE и R^2 в списки
    variances.append(sample_variance)
    maes.append(mae)
    mses.append(mse)
    r2_scores.append(r2)

# Создание DataFrame из дисперсий, MAE, MSE и R^2
results_df = pd.DataFrame({
    'Iteration': range(1, len(X.columns) + 1),
    'Variance': variances,
    'MAE': maes,
    'MSE': mses,
    'R^2': r2_scores
})

# Вывод таблицы DataFrame
results_df

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,Iteration,Variance,MAE,MSE,R^2
0,1,42041.55812,237.803301,88081.578756,0.746988
1,2,169559.177204,241.689002,162077.454328,0.534437
2,3,176764.026388,232.418884,159735.111198,0.541165
3,4,45613.1395,133.762527,36544.863175,0.895026
4,5,742245.503254,507.040731,628010.293242,-0.803942
5,6,22341.720634,130.503805,20189.808674,0.942005
6,7,158653.855392,259.770714,123204.143704,0.6461


In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import pandas as pd
from sklearn. metrics import mean_absolute_error 
from ydata_profiling import ProfileReport
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


class KolmogorovGaborPolynomial:
    """
    Class for constructing the Kolmogorov-Gabor polynomial.

    Attributes:
    ----------
    models_dict : dict
        Dictionary for storing trained models.

    partial_polynomial_df : DataFrame
        DataFrame for storing intermediate results during training.

    stop : int
        Number of iterations for training the model.
    """

    def __init__(self):
        """
        Initialize the KolmogorovGaborPolynomial class.
        """
        self.models_dict = {}  # Dictionary for storing models

    def fit(self, X, Y, stop=None):
        """
        Train the model based on input data.

        Parameters:
        ----------
        X : DataFrame
            Input data (features).
        Y : DataFrame or Series
            Target values.
        stop : int, optional
            Number of iterations for training the model (default is None, which means using all features).

        Returns:
        ----------
        model : LinearRegression
            The trained model at the last iteration.
        """
        if stop is None:
            stop = len(X.columns)
        self.stop = stop

        # Create a copy of X for modification
        local_X = X.copy()

        # Initial model (first iteration)
        model = LinearRegression()
        model.fit(local_X, Y)
        predictions = model.predict(local_X)

        # Create a DataFrame for storing intermediate results
        self.partial_polynomial_df = pd.DataFrame(index=Y.index)
        self.partial_polynomial_df['Y'] = Y.values.flatten()
        self.partial_polynomial_df['Y_pred'] = predictions.flatten()

        # Add the first column from local_X, squared, to partial_polynomial_df and remove it from local_X
        self.partial_polynomial_df[local_X.columns[0] + '^2'] = local_X.iloc[:, 0] ** 2
        local_X.drop(local_X.columns[0], axis=1, inplace=True)

        self.models_dict['1'] = model

        for i in range(2, stop + 1):
            # Add new polynomial feature of Y_pred
            self.partial_polynomial_df[f'Y_pred^{i}'] = (predictions ** i).flatten()

            # Limit prediction values to avoid overflow
            self.partial_polynomial_df.replace([np.inf, -np.inf], np.nan, inplace=True)
            self.partial_polynomial_df.fillna(0, inplace=True)

            # Add the next column from local_X, squared, to partial_polynomial_df, if available
            if not local_X.empty:
                self.partial_polynomial_df[local_X.columns[0] + '^2'] = local_X.iloc[:, 0] ** 2
                local_X.drop(local_X.columns[0], axis=1, inplace=True)

            # Train a new model with additional features
            model = LinearRegression()
            X_new = self.partial_polynomial_df.drop(columns='Y')
            model.fit(X_new, Y)
            predictions = model.predict(X_new)

            self.models_dict[str(i)] = model

        return self.models_dict[str(stop)]

    def predict(self, X, stop=None):
        """
        Make predictions based on the trained model.

        Parameters:
        ----------
        X : DataFrame
            Input data (features).
        stop : int, optional
            Number of iterations for prediction (default is None, which means using self.stop value).

        Returns:
        ----------
        predictions : ndarray
            Predicted values.
        """
        if stop is None:
            stop = self.stop

        # Create a copy of X for modification
        local_X = X.copy()

        # Initial predictions
        model = self.models_dict['1']
        predictions = model.predict(local_X)

        if stop == 1:
            return predictions

        # Create a DataFrame for storing intermediate prediction results
        predict_polynomial_df = pd.DataFrame(index=X.index)
        predict_polynomial_df['Y_pred'] = predictions.flatten()

        # Add the first column from local_X, squared, to predict_polynomial_df and remove it from local_X
        predict_polynomial_df[local_X.columns[0] + '^2'] = local_X.iloc[:, 0] ** 2
        local_X.drop(local_X.columns[0], axis=1, inplace=True)

        for i in range(2, stop + 1):
            # Add new polynomial feature of Y_pred
            predict_polynomial_df[f'Y_pred^{i}'] = (predictions ** i).flatten()

            # Limit prediction values to avoid overflow
            predict_polynomial_df.replace([np.inf, -np.inf], np.nan, inplace=True)
            predict_polynomial_df.fillna(0, inplace=True)

            # Add the next column from local_X, squared, to predict_polynomial_df, if available
            if not local_X.empty:
                predict_polynomial_df[local_X.columns[0] + '^2'] = local_X.iloc[:, 0] ** 2
                local_X.drop(local_X.columns[0], axis=1, inplace=True)

            model = self.models_dict[str(i)]
            predictions = model.predict(predict_polynomial_df)

        return predictions

# Пример использования класса
# Создаем экземпляр класса
kpg = KolmogorovGaborPolynomial()

# Загрузка данных
df = pd.read_excel(r'C:\Users\RobotComp.ru\PycharmProjects\kolmagorovGabor\data\polynom_miroshnichenko.xlsx', sheet_name='data')
Y = df[['Металл и добыча (Y3)']]
X = df.drop(columns=['Металл и добыча (Y3)', 'Дата'])

# Разделение данных на обучающие и тестовые
from sklearn.model_selection import train_test_split
# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Переменные для хранения дисперсий, MAE, MSE и R^2
variances = []
maes = []
mses = []
r2_scores = []

# Цикл по количеству колонок в X
for i in range(1, len(X.columns) + 1):
    # Обучение модели с текущим значением stop
    kpg.fit(X, Y, stop=i)
    # print(kpg.models_dict)
    # Получение предсказаний
    predictions = kpg.predict(X=X, stop=i)
    # Вычисление ошибок
    errors = Y.values.flatten() - predictions.flatten()
    # Вычисление дисперсии ошибок
    sample_variance = errors.var(ddof=1)
    # Вычисление MAE
    mae = mean_absolute_error(Y.values.flatten(), predictions.flatten())
    # Вычисление MSE
    mse = mean_squared_error(Y.values.flatten(), predictions.flatten())
    # Вычисление R^2
    r2 = r2_score(Y.values.flatten(), predictions.flatten())
    # Добавление дисперсии, MAE, MSE и R^2 в списки
    variances.append(sample_variance)
    maes.append(mae)
    mses.append(mse)
    r2_scores.append(r2)

# Создание DataFrame из дисперсий, MAE, MSE и R^2
results_df = pd.DataFrame({
    'Iteration': range(1, len(X.columns) + 1),
    'Variance': variances,
    'MAE': maes,
    'MSE': mses,
    'R^2': r2_scores
})

# Вывод таблицы DataFrame
results_df

Unnamed: 0,Iteration,Variance,MAE,MSE,R^2
0,1,3996.548622,52.94955,3761.457527,0.977236
1,2,3353.772168,45.969039,3156.491452,0.980897
2,3,2271.32735,35.632307,2137.719859,0.987063
3,4,2011.071479,36.701471,1892.773157,0.988545
4,5,1486.503524,30.664164,1399.06214,0.991533
5,6,3693.897409,46.370132,3476.609326,0.97896
6,7,2040.199542,32.150861,1920.187804,0.988379
