# Величины S

$$
\begin{align*}
    S_U(x;\sigma^{mid})&=\frac{1}{3}
\sum_{t\in\mathbb{F}_3}
\sum_{\sigma^{in}\in\{-1,1\}^{V^{in}}}
\chi([x;t]^T P^{in}(\sigma^{in};\sigma^{mid})[x;t])\\

S_k(x;\sigma^{mid})&=\sum_{\sigma^{in}\in\{-1,1\}^{V^{in}_k}}
    \chi(x^T P_k^{in}(\sigma^{in};\sigma^{mid})x)
\end{align*}
$$

Нужно выразить значения $S_U$ через $S_k$.

In [16]:
import os

import pandas as pd
import numpy as np
import sympy as sp
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression


sns.set_style("darkgrid")
sns.set_context("notebook")
sns.set_palette("viridis")

## 1. Download

Скачаем все файлы с Гугл диска (либо можно пропустить и собрать данные самим)

In [2]:
data_dir = "../assets/data"
os.makedirs(data_dir, exist_ok=True)

# Uncomment to download data

# !pip install gdown -q
# import gdown
# gdown.download_folder(
#     url="https://drive.google.com/drive/folders/1aWnVLwRHkxQDhAz86DHr9jipaShbzTHt?usp=drive_link",
#     output=data_dir
# )

## 2. Read data

In [3]:
data = pd.read_csv(os.path.join(data_dir, "s_values.csv"))
data.head()

Unnamed: 0,sigma_mid,x,S_U,S_A,S_B,S_C,S_D,S_E
0,"[-1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1]",-15 + 15*sqrt(3)*I,-4 + 4*sqrt(3)*I,-4 + 4*sqrt(3)*I,-4 + 4*sqrt(3)*I,-4 + 4*sqrt(3)*I,-4 + 4*sqrt(3)*I
1,"[-1, -1, -1, -1, -1]","[-1, -1, -1, -1, 0]",-3/2 + 3*sqrt(3)*I/2,-1 + sqrt(3)*I,2 - 2*sqrt(3)*I,1/2 - sqrt(3)*I/2,2 - 2*sqrt(3)*I,-1 + sqrt(3)*I
2,"[-1, -1, -1, -1, -1]","[-1, -1, -1, -1, 1]",3,2,-4,-1,-4,2
3,"[-1, -1, -1, -1, -1]","[-1, -1, -1, 0, -1]",-3/2 + 3*sqrt(3)*I/2,2 - 2*sqrt(3)*I,1/2 - sqrt(3)*I/2,2 - 2*sqrt(3)*I,-1 + sqrt(3)*I,-1 + sqrt(3)*I
4,"[-1, -1, -1, -1, -1]","[-1, -1, -1, 0, 0]",3 + 3*sqrt(3)*I,-1 - sqrt(3)*I,1/2 + sqrt(3)*I/2,1/2 + sqrt(3)*I/2,-1 - sqrt(3)*I,1/2 + sqrt(3)*I/2


In [4]:
for col in ["S_U", "S_A", "S_B", "S_C", "S_D", "S_E"]:
    data[col] = data[col].apply(sp.nsimplify)
    print(col)

S_U
S_A
S_B
S_C
S_D
S_E


## 3. ML

In [12]:
data = data.drop_duplicates(["S_U", "S_A", "S_B", "S_C", "S_D", "S_E"])
print(data.shape)
data.head()

(2051, 8)


Unnamed: 0,sigma_mid,x,S_U,S_A,S_B,S_C,S_D,S_E
0,"[-1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1]",-15 + 15*sqrt(3)*I,-4 + 4*sqrt(3)*I,-4 + 4*sqrt(3)*I,-4 + 4*sqrt(3)*I,-4 + 4*sqrt(3)*I,-4 + 4*sqrt(3)*I
1,"[-1, -1, -1, -1, -1]","[-1, -1, -1, -1, 0]",-3/2 + 3*sqrt(3)*I/2,-1 + sqrt(3)*I,2 - 2*sqrt(3)*I,1/2 - sqrt(3)*I/2,2 - 2*sqrt(3)*I,-1 + sqrt(3)*I
2,"[-1, -1, -1, -1, -1]","[-1, -1, -1, -1, 1]",3,2,-4,-1,-4,2
3,"[-1, -1, -1, -1, -1]","[-1, -1, -1, 0, -1]",-3/2 + 3*sqrt(3)*I/2,2 - 2*sqrt(3)*I,1/2 - sqrt(3)*I/2,2 - 2*sqrt(3)*I,-1 + sqrt(3)*I,-1 + sqrt(3)*I
4,"[-1, -1, -1, -1, -1]","[-1, -1, -1, 0, 0]",3 + 3*sqrt(3)*I,-1 - sqrt(3)*I,1/2 + sqrt(3)*I/2,1/2 + sqrt(3)*I/2,-1 - sqrt(3)*I,1/2 + sqrt(3)*I/2


In [13]:
X = np.array(data[["S_A", "S_B", "S_C", "S_D", "S_E"]], dtype=np.complex_)
X.dtype

dtype('complex128')

In [14]:
y = np.array(data["S_U"], dtype=np.complex_)
y[:5]

array([-15. +25.98076211j,  -1.5 +2.59807621j,   3.  +0.j        ,
        -1.5 +2.59807621j,   3.  +5.19615242j])

In [15]:
def create_poly_features(X: np.array, power: int = 2) -> np.array:
    res = [X]
    for p in range(2, power+1):
        res.append(X ** p)
    return np.hstack(res)

create_poly_features(X, power=3).shape

(2051, 15)

In [18]:
def calc_loss(y_true, y_pred):
    return np.abs(y_true - y_pred).mean()

### 1. Поделим на две части

In [20]:
def calc_experiment_sklearn(X, y, poly_power: int=5):
    X_real = X.real
    X_imag = X.imag
    y_real = y.real
    y_imag = y.imag

    X_real = create_poly_features(X_real, poly_power)
    X_imag = create_poly_features(X_imag, poly_power)

    lr1 = LinearRegression().fit(X_real, y_real)
    lr2 = LinearRegression().fit(X_imag, y_imag)

    pred1 = lr1.predict(X_real)
    pred2 = lr2.predict(X_imag)
    
    pred = pred1 + pred2 * 1j
    
    loss = calc_loss(y_true=y, y_pred=pred)
    return loss


for poly_power in [1,2,3,4,5,6,7,8]:
    loss = calc_experiment_sklearn(X,y,poly_power=poly_power)
    print(f"Poly: {poly_power}; loss: {loss}")

Poly: 1; loss: 8.115713787509007
Poly: 2; loss: 7.93856969131932
Poly: 3; loss: 7.762260703723982
Poly: 4; loss: 7.57064395814136
Poly: 5; loss: 6.902653756654313
Poly: 6; loss: 6.9026537566547095
Poly: 7; loss: 6.728744414571163
Poly: 8; loss: 6.728744414638822


### 2. Аналитическая формула

In [21]:
def complex_linear_regression(X, y, l2=0.0):
    X_H = np.conj(X.T)
    I = np.eye(X.shape[1])
    w = np.linalg.inv(X_H @ X + l2 * I) @ X_H @ y
    return w


def calc_experiment_analytical(X, y, poly_power, l2=0.0):
    X = create_poly_features(X, poly_power)

    w = complex_linear_regression(X, y, l2=l2)

    pred = X @ w

    loss = calc_loss(y_true=y, y_pred=pred)
    return loss


for poly_power in [1, 2, 3, 4, 5, 6, 7, 8]:
    for l2 in [0.0,0.1,0.5,1,10]:
        loss = calc_experiment_analytical(X, y, poly_power=poly_power, l2=l2)
        print(f"Poly: {poly_power}; l2: {l2}; loss: {loss}")

Poly: 1; l2: 0.0; loss: 8.143487967626172
Poly: 1; l2: 0.1; loss: 8.143488412676028
Poly: 1; l2: 0.5; loss: 8.143490192837076
Poly: 1; l2: 1; loss: 8.143492417952059
Poly: 1; l2: 10; loss: 8.143532453622413
Poly: 2; l2: 0.0; loss: 8.126466126585333
Poly: 2; l2: 0.1; loss: 8.126466531604855
Poly: 2; l2: 0.5; loss: 8.126468151653075
Poly: 2; l2: 1; loss: 8.126470176646134
Poly: 2; l2: 10; loss: 8.126506613750777
Poly: 3; l2: 0.0; loss: 8.119632309749479
Poly: 3; l2: 0.1; loss: 8.119632680008007
Poly: 3; l2: 0.5; loss: 8.119634161017066
Poly: 3; l2: 1; loss: 8.119636012222017
Poly: 3; l2: 10; loss: 8.119669323198428
Poly: 4; l2: 0.0; loss: 7.803914664687208
Poly: 4; l2: 0.1; loss: 7.803913950524711
Poly: 4; l2: 0.5; loss: 7.803911093987702
Poly: 4; l2: 1; loss: 7.803907523570626
Poly: 4; l2: 10; loss: 7.803843304319074
Poly: 5; l2: 0.0; loss: 7.782523560095234
Poly: 5; l2: 0.1; loss: 7.782522831665295
Poly: 5; l2: 0.5; loss: 7.782519918064146
Poly: 5; l2: 1; loss: 7.782516276329543
Poly: 

Так тоже не получается