### Maxwell's equation for the magnetic field strength. 
Values for variation: E from 300 to 3000 step 10, sigma from 0.1 to 1.0 step 0.1, H_R = 5000, tube radius R=0.012m.

Predict H(0) by linear regression. We evaluate the quality of the model.

In [1]:
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,mean_squared_error
from sklearn.preprocessing import StandardScaler

In [2]:
class Plasmatron():
    
    '''Setting model parameters and the solution of the equation'''
    
    def __init__(self, H_R=5000, R=0.012, N=200, h=0.0000038):
        '''Parameter initialization'''
        self.H_R = H_R
        self.R = R
        self.N = N
        self.h = h

        self.sigma_start = 0.1
        self.sigma_end = 1.01
        self.sigma_step = 0.1

        self.E_start = 300
        self.E_end = 3001
        self.E_step = 10
        
        self.sigma = np.array([round(i, 1) for i in np.arange(self.sigma_start, self.sigma_end, self.sigma_step)])
        self.E = np.array(range(self.E_start, self.E_end, self.E_step))
    
    def sweep(self, sigma, N, R, h, E, H_R):
        '''Equation solution'''
        A = np.zeros(N)
        B = np.zeros(N)
        C = np.zeros(N)
        F = np.zeros(N)

        r = np.linspace(h, R, N)
        V = 1 / sigma

        for i in range(0, N):
            r_imh = r[i] - h / 2
            r_iph = r[i] + h / 2
            V_imh = (V)
            V_iph = (V)
            f_i = 2 * sigma * E ** 2
            A[i] = r[i] * V_iph
            B[i] = r_iph * V_iph
            C[i] = r_imh * V_imh + B[i]
            F[i] = -r[i] * f_i * h ** 2

        H     = np.zeros(N)
        alpha = np.zeros(N)
        beta  = np.zeros(N)

        beta[N - 1] = H_R

        for i in reversed(range(0, N - 1)):
            alpha[i] = A[i] / (C[i] - B[i] * alpha[i + 1])
            beta[i] = (B[i] * beta[i + 1] + F[i]) / (C[i] - B[i] * alpha[i + 1])

        H[0] = (beta[0] - alpha[0] * beta[1]) / (1 - alpha[0] * alpha[1])

        for i in range(1, N - 1):
            H[i + 1] = alpha[i + 1] * H[i] + beta[i + 1]
        return H, r
    
    def Computingdata(self):
        '''Computing'''
        
        self.filename = 'data.xlsx' 
        self.file_path = os.path.join(os.getcwd(), self.filename)
        
        if os.path.exists(self.file_path):
            df = pd.read_excel(self.file_path)
        else:
            df = pd.DataFrame(columns=['H', 'R', 'E', 'sigma'])

            sigma = self.sigma
            H_R = self.H_R
            E = self.E
            N = self.N
            R = self.R
            h = self.h

            H_list = []
            R_list = []
            E_list = []
            sigma_list = []

            for s in sigma:
                for e in E:
                    for i in range(1, N):
                        H, r = self.sweep(s, N, R, h, e, H_R)
                        H_list.append(H[i])
                        R_list.append(r[i])
                        E_list.append(e)
                        sigma_list.append(s)

            H_list = H_list/max(H_list)
            R_list = R_list/max(R_list)
            E_list = E_list/max(E_list)
            sigma_list = sigma_list/max(sigma_list)

            df = pd.DataFrame({'H': H_list, 'R': R_list, 'E': E_list, 'sigma': sigma_list})
            df = df.loc[df['H'] > 0]
            df.to_excel('data.xlsx', index=False)
        
        return df

In [3]:
class DataExploration():
    '''Graphs. Relationships between features'''
    def __init__(self, df):
        self.df = df
        
    def charts(self):
        sns.pairplot(df,diag_kind='kde')

In [6]:
class LRModel():
    '''Initializing the Linear Regression Model'''
    def __init__(self, df):
        self.df = df
        
    def model(self):
        
        X = df.drop('H',axis=1)
        y = df['H']
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)
        
        model = LinearRegression()
        model.fit(X_train,y_train)
        
        test_predictions = model.predict(X_test)
        
        MAE = mean_absolute_error(y_test,test_predictions)
        MSE = mean_squared_error(y_test,test_predictions)
        RMSE = np.sqrt(MSE)

        print('MAE :', MAE, 'MSE :', MSE, 'RMSE :', RMSE)
        

In [4]:
Plasma_model = Plasmatron()
df = Plasma_model.Computingdata()
DataExploration.charts(df)
LRModel.model(df)

### Metrics 

**Средняя абсолютная ошибка - Mean Absolute Error** (MAE) - усредняет абсолютные значения ошибок:

$$\frac 1n\sum_{i=1}^n|y_i-\hat{y}_i|$$

**Среднеквадратическая ошибка - Mean Squared Error** (MSE) - усредняет квадраты ошибок:

$$\frac 1n\sum_{i=1}^n(y_i-\hat{y}_i)^2$$

**Среднеквадратическое отклонение - Root Mean Squared Error** (RMSE) - квадратный корень от среднеквадратической ошибки:

$$\sqrt{\frac 1n\sum_{i=1}^n(y_i-\hat{y}_i)^2}$$

Сравнение метрик:

- **MAE**  - средняя ошибка.
- **MSE**  - более популярнее MAE, потому что MSE больше "наказывает" большие ошибки, и обычно это более полезно в прикладных задачах.
- **RMSE** - даже ещё более популярна, чем MSE, потому что RMSE измеряется в тех же единицах, что и "y".

Все эти метрики являются **функциями потери (loss functions)**, потому нужно стремимся уменьшить их.