In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import seaborn as sns
from matplotlib import pyplot as plt
from plotly.subplots import make_subplots
from sklearn.model_selection import train_test_split

In [83]:
df = pd.read_csv("../data/data.csv")

In [84]:
df.head(11)

Unnamed: 0,y,x1,x2,x3,x4
0,0.904,75.5,25.2,3343,77.0
1,0.922,78.5,21.8,3001,78.2
2,0.763,78.4,25.7,3101,68.0
3,0.923,77.7,17.8,3543,77.2
4,0.918,84.4,15.9,3237,77.2
5,0.906,75.9,22.4,3330,77.2
6,0.905,76.0,20.6,3808,75.7
7,0.545,67.5,25.2,2415,62.6
8,0.894,78.2,20.7,3295,78.0
9,0.9,78.1,17.5,3504,78.2


In [None]:
df.isna().any()

In [None]:
df.shape

In [None]:
df.nunique()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
f, ax = plt.subplots(1, figsize=(8, 6))
sns.heatmap(df.corr(), annot=True, ax=ax)

# GMDH model

![GMDH](../assets/Structure-of-the-GMDH-algorithm.jpg)

In [None]:
class GMDH:
    def __init__(self, max_layers=10, stopping_threshold=0.01):
        self.max_layers = max_layers
        self.stopping_threshold = stopping_threshold
        self.layers = []
        self.w = []
        self.rs = []

    @staticmethod
    def func(X_train):
        return np.concatenate((np.ones((len(X_train), 1)), X_train), axis=1)

    def fit(self, X_train, y_train):
        curr_layer = 0
        layer_in = self.func(X_train)
        layer_out = y_train.reshape(-1, 1)
        while curr_layer < self.max_layers:
            w = np.linalg.lstsq(layer_in, layer_out, rcond=None)[0]
            pred = np.dot(layer_in, w)
            rs = np.sum(
                (pred - layer_out) ** 2 / np.sum((layer_out - np.mean(layer_out)) ** 2)
            )
            if len(self.rs) > 0 and rs - self.rs[-1] < self.stopping_threshold:
                break

            self.layers.append(layer_in)
            self.w.append(w)
            self.rs.append(rs)
            layer_in = np.concatenate((layer_in, layer_in**2), axis=1)
            layer_out = layer_out - pred
            curr_layer += 1

    def predict(self, X_test):
        layer_in = self.func(X_test)
        pred = np.zeros((len(X_test),))

        for _, w in zip(self.layers, self.w):
            pred += np.dot(layer_in, w).ravel()
            layer_in = np.concatenate((layer_in, layer_in**2), axis=1)
        return pred

# Data separating

In [None]:
df = df.to_numpy()
X = df[:, 1:]  # Features
y = df[:, 0]  # Target variable 'y'

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
len(X_train), len(X_test), len(y_train), len(y_test)

# Training

In [None]:
gmdh = GMDH()
gmdh.fit(X_train, y_train)

# Evaluate Model

In [None]:
def get_merged(actual, pred):
    d = {"actual": actual, "pred": pred}
    merged = pd.DataFrame(d)
    merged.reset_index(drop=True)
    print("\n", merged.head())

In [None]:
def average_approximation_error(actual, pred):
    average_approximation_error = np.mean(np.abs(actual - pred))
    print("Average Approximation Error:", average_approximation_error)

In [None]:
def prediction_plot(actual, pred, title=None, actual_name=None, pred_name=None):
    fig = make_subplots(rows=1, cols=1)
    tr1 = go.Scatter(
        x=np.arange(len(actual)),
        y=actual,
        mode="lines+markers",
        name=actual_name,
    )
    tr2 = go.Scatter(
        x=np.arange(len(pred)),
        y=pred,
        mode="lines+markers",
        name=pred_name,
    )
    fig.add_trace(tr1, row=1, col=1)
    fig.append_trace(tr2, row=1, col=1)
    fig.update_layout(title=title)
    fig.show()

In [None]:
def evaluate_model(X, y, title=None, actual_name=None, pred_name=None):
    y_pred = gmdh.predict(X)
    prediction_plot(
        y, y_pred, title=title, actual_name=actual_name, pred_name=pred_name
    )
    average_approximation_error(y, y_pred)
    get_merged(y, y_pred)

In [None]:
evaluate_model(X_train, y_train, "Train Data", "X_train", "y_train")

In [None]:
evaluate_model(X_test, y_test, "Test Data", "X_test", "y_test")

In [None]:
gmdh.fit(X, y)
evaluate_model(X, y, "Initial Data", "X", "y")