In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

In [4]:
A = np.array([10, 5, 8, 1000])
B = np.array([50, 100, 30, 0.1])
(A * B).sum()

np.float64(1340.0)

---

In [5]:
df = pd.read_csv("./data/Salary_data.csv", index_col=0)
df.head()

Unnamed: 0,YearsExperience,Salary
0,1.2,39344.0
1,1.4,46206.0
2,1.6,37732.0
3,2.1,43526.0
4,2.3,39892.0


In [6]:
X = df.iloc[:, 0]
y = df.iloc[:, 1]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=10)

base_lr = LinearRegression()
base_lr.fit(X_train.values.reshape(-1,1), y_train)
print(base_lr.coef_, base_lr.intercept_)


[9356.86299354] 25154.410333062755


In [8]:
base_pred = base_lr.predict(X_test.values.reshape(-1,1))
mae_base = np.round(mean_absolute_error(y_test, base_pred), 2)
print(f"Mean Absolute Error: {mae_base}")

base_r2 = np.round(r2_score(y_test, base_pred), 2)
print(f"R-squared: {base_r2}")

Mean Absolute Error: 2741.28
R-squared: 0.98


---

In [9]:
df_2 = pd.read_csv("./data/poly.csv")
df_2.head()

Unnamed: 0,X,Y
0,-3.292157,-53.468412
1,0.799528,-1.26256
2,-0.936214,-3.506193
3,-4.72268,-107.2232
4,-3.602674,-49.13195


In [10]:
X = df_2.iloc[:, 0]
y = df_2.iloc[:, 1]

In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=10)

base_2_lr = LinearRegression()
base_2_lr.fit(X_train.values.reshape(-1, 1), y_train)

test_pred = base_2_lr.predict(X_test.values.reshape(-1, 1))
train_pred = base_2_lr.predict(X_train.values.reshape(-1, 1))

r2_train = np.round(r2_score(y_train, train_pred), 2)
r2_test = np.round(r2_score(y_test, test_pred), 2)


print(f"R-squared (Train): {r2_train}")
print(f"R-squared (Test): {r2_test}")

R-squared (Train): 0.67
R-squared (Test): -5.66


In [12]:
num_feats = [i for i in range(2, 8)]
test_error = []
for feat in num_feats:
    pipe = Pipeline([
        ("poly", PolynomialFeatures(degree=feat)),
        ("linear_regression", LinearRegression())
        ])
    pipe.fit(X_train.values.reshape(-1, 1), y_train)
    prediction = pipe.predict(X_test.values.reshape(-1, 1))
    r_2_test = np.round(r2_score(y_test, prediction), 2)
    
    test_error.append(r_2_test)
    
print(test_error[np.argmin(test_error)])
print(test_error)

-3.24
[np.float64(-3.24), np.float64(0.84), np.float64(0.94), np.float64(0.9), np.float64(-0.74), np.float64(-1.29)]


In [13]:
from sklearn.linear_model import Ridge

In [19]:
X_train = X_train.values.reshape(-1, 1)
X_test = X_test.values.reshape(-1, 1)

In [27]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
import numpy as np

# Диапазон значений alpha
alpha_values = np.linspace(0, 10, num=101)  # от 0 до 10 включительно с шагом 0.1
best_r2_test = -np.inf
best_alpha = None

# Для каждого значения alpha, строим модель и вычисляем метрики
for alpha in alpha_values:
    pipe = Pipeline([
        ("poly", PolynomialFeatures(degree=7)),
        ("ridge", Ridge(alpha=alpha))
    ])
    
    # Обучение модели
    pipe.fit(X_train, y_train)

    # Предсказания
    y_train_pred = pipe.predict(X_train)
    y_test_pred = pipe.predict(X_test)

    # Вычисление метрик R²
    r2_train = r2_score(y_train, y_train_pred)
    r2_test = r2_score(y_test, y_test_pred)

    # Выводим метрики
    print(f"Alpha: {alpha:.2f}, R² Training: {r2_train:.3f}, R² Testing: {r2_test:.3f}")

    # Проверяем наилучший результат на тестовой выборке
    if r2_test > best_r2_test:
        best_r2_test = r2_test
        best_alpha = alpha

# Выводим наилучший результат
print(f"Best R² on test set: {best_r2_test:.3f} with alpha: {best_alpha:.2f}")

Alpha: 0.00, R² Training: 0.985, R² Testing: -1.287
Alpha: 0.10, R² Training: 0.985, R² Testing: -0.913
Alpha: 0.20, R² Training: 0.985, R² Testing: -0.602
Alpha: 0.30, R² Training: 0.985, R² Testing: -0.342
Alpha: 0.40, R² Training: 0.985, R² Testing: -0.123
Alpha: 0.50, R² Training: 0.985, R² Testing: 0.061
Alpha: 0.60, R² Training: 0.985, R² Testing: 0.215
Alpha: 0.70, R² Training: 0.985, R² Testing: 0.346
Alpha: 0.80, R² Training: 0.985, R² Testing: 0.455
Alpha: 0.90, R² Training: 0.985, R² Testing: 0.547
Alpha: 1.00, R² Training: 0.985, R² Testing: 0.624
Alpha: 1.10, R² Training: 0.985, R² Testing: 0.688
Alpha: 1.20, R² Training: 0.985, R² Testing: 0.741
Alpha: 1.30, R² Training: 0.985, R² Testing: 0.784
Alpha: 1.40, R² Training: 0.985, R² Testing: 0.819
Alpha: 1.50, R² Training: 0.985, R² Testing: 0.846
Alpha: 1.60, R² Training: 0.985, R² Testing: 0.867
Alpha: 1.70, R² Training: 0.985, R² Testing: 0.882
Alpha: 1.80, R² Training: 0.985, R² Testing: 0.893
Alpha: 1.90, R² Training: 

---

In [28]:
df_3 = pd.read_csv("./data/manufacturing.csv")
df_3

Unnamed: 0,Temperature (°C),Pressure (kPa),Temperature x Pressure,Material Fusion Metric,Material Transformation Metric,Quality Rating
0,209.762701,8.050855,1688.769167,44522.217074,9.229576e+06,99.999971
1,243.037873,15.812068,3842.931469,63020.764997,1.435537e+07,99.985703
2,220.552675,7.843130,1729.823314,49125.950249,1.072839e+07,99.999758
3,208.976637,23.786089,4970.736918,57128.881547,9.125702e+06,99.999975
4,184.730960,15.797812,2918.345014,38068.201283,6.303792e+06,100.000000
...,...,...,...,...,...,...
3952,156.811578,21.794290,3417.596965,34941.963896,3.855501e+06,100.000000
3953,197.850406,8.291704,1640.516924,39714.857236,7.744742e+06,99.999997
3954,241.357144,16.391910,3956.304672,62657.690952,1.405957e+07,99.989318
3955,209.040239,23.809936,4977.234763,57195.985528,9.134036e+06,99.999975


In [None]:
X = df_3.drop("Quality Rating", axis=1)
y = df_3["Quality Rating"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=10)

linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

y_pred = linear_model.predict(X_test)
y_pred_train = linear_model.predict(X_train)

r_2_test = np.round(r2_score(y_test, y_pred), 2)
r_2_train = np.round(r2_score(y_train, y_pred_train), 2)

print(r_2_test, r_2_train)


0.53 0.5


In [39]:
pipe = Pipeline([
    ("poly", PolynomialFeatures(degree=2)),
    ("model", LinearRegression())
])


pipe.fit(X_train, y_train)

y_pred = pipe.predict(X_test)
y_pred_train = pipe.predict(X_train)

r_2_test = np.round(r2_score(y_test, y_pred), 2)
r_2_train = np.round(r2_score(y_train, y_pred_train), 2)

print(r_2_test, r_2_train)

0.86 0.86


In [40]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Создаем Pipeline с полиномиальными признаками до 3 степени и линейной моделью
pipe = Pipeline([
    ("poly", PolynomialFeatures(degree=3)),
    ("linear", LinearRegression())
])

# Обучаем модель на тренировочных данных
pipe.fit(X_train, y_train)

# Делаем предсказания на тренировочных и тестовых данных
y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)

# Вычисляем метрики R²
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

# Выводим результаты
print(f"R² Training: {r2_train:.3f}")
print(f"R² Testing: {r2_test:.3f}")

R² Training: 0.956
R² Testing: 0.956
