# Построение модели для прогнозирования модуля упругости при растяжении

In [30]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso, RidgeCV, LassoCV
from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score

In [31]:
#df.index = df.index.astype('int')

In [32]:
df = pd.read_csv('data_model.csv', index_col = 0)
df.sample(5)

Unnamed: 0,T1,F1,F2,F3,F4,F5,F6,T2,T3,F7,F9,F10
959.0,2.263186,1927.067581,1191.498362,82.452023,24.391331,256.273023,294.814619,77.995289,2758.414767,241.620617,5.24397,59.011514
838.0,3.990794,2018.231213,710.831426,101.903836,21.195527,227.564104,700.659837,73.550433,2647.588058,239.096087,10.387381,52.761679
859.0,1.637759,1994.089395,1501.915969,106.703068,22.08694,277.271915,390.592063,74.255046,2275.243194,159.76464,8.613776,49.636952
1013.0,2.310394,1931.146887,554.010341,96.749782,22.146487,214.827727,56.242761,78.143609,1939.30755,87.270139,7.683346,62.785021
285.0,3.864766,1964.924431,369.448793,138.699165,21.909672,276.393919,176.460142,74.017914,1630.31755,307.352077,7.016182,67.448985


1. Упражнения на всей выборке

In [33]:
X, y = df.drop(['T2'], axis = 1).values, df['T2'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3,random_state = 42)

#res= clf._predict_proba_lr(X_test,Y_test)

X_train.shape

(655, 11)

In [34]:
scaler = StandardScaler()

In [35]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [36]:
X_train_scaled.shape

(655, 11)

In [37]:
X_test_scaled.shape

(281, 11)

In [38]:
def train_validate_report(model, X_train_scaled, y_train, X_test_scaled, y_test, feature_names):
    
    model.fit(X_train_scaled, y_train)
    
    print('MAE:', mean_absolute_error(y_test, model.predict(X_test_scaled)))
    print('MSE:', mean_squared_error(y_test, model.predict(X_test_scaled)))
    print('RMSE:', np.sqrt(mean_squared_error(y_test, model.predict(X_test_scaled))))
    
    print(pd.DataFrame(model.coef_, feature_names, columns = ['coef']).sort_values(
        by = 'coef', ascending = False))

In [39]:
lasso = Lasso(random_state = 17)
ridge = Ridge(random_state = 17)
lasso_cv = LassoCV(random_state = 17)
ridge_cv = RidgeCV()

In [40]:
train_validate_report(lasso, X_train_scaled, y_train, X_test_scaled, y_test, 
                      feature_names = df.columns[:-1])

MAE: 2.3986344930417918
MSE: 8.765901841522709
RMSE: 2.960726573245613
    coef
T1  -0.0
F1  -0.0
F2   0.0
F3  -0.0
F4   0.0
F5   0.0
F6   0.0
T2  -0.0
T3   0.0
F7  -0.0
F9   0.0


In [12]:
train_validate_report(ridge, X_train_scaled, y_train, X_test_scaled, y_test, feature_names = df.columns[:-1])

MAE: 2.4182609859719615
MSE: 8.776976606130868
RMSE: 2.9625962610742063
        coef
F2  0.174437
F4  0.169266
F6  0.135203
T3  0.127073
F9  0.096131
F5  0.014689
F1 -0.061166
T2 -0.064422
F7 -0.072487
T1 -0.111564
F3 -0.168502


In [13]:
train_validate_report(lasso_cv, X_train_scaled, y_train, X_test_scaled, y_test, feature_names = df.columns[:-1])

MAE: 2.3986344930417918
MSE: 8.765901841522709
RMSE: 2.960726573245613
            coef
F2  6.508788e-17
T1 -0.000000e+00
F1 -0.000000e+00
F3 -0.000000e+00
F4  0.000000e+00
F5  0.000000e+00
F6  0.000000e+00
T2 -0.000000e+00
T3  0.000000e+00
F7 -0.000000e+00
F9  0.000000e+00


In [14]:
train_validate_report(ridge_cv, X_train_scaled, y_train, X_test_scaled, y_test, feature_names = df.columns[:-1])

MAE: 2.417859597558593
MSE: 8.774790315131943
RMSE: 2.9622272558215283
        coef
F2  0.171922
F4  0.166719
F6  0.133023
T3  0.125163
F9  0.094671
F5  0.014611
F1 -0.059813
T2 -0.063016
F7 -0.071267
T1 -0.109882
F3 -0.165828


2. На важных признаках

In [15]:
df_1 = df.drop(['T2', 'F1', 'F2', 'T1', 'F5', 'T3', 'F10'], axis = 1)
X, y = df_1.values, df['T2'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3,random_state = 42)

#res= clf._predict_proba_lr(X_test,Y_test)

X_train.shape

(655, 5)

In [16]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [17]:
def train_validate_report(model, X_train_scaled, y_train, X_test_scaled, y_test, feature_names):
    
    model.fit(X_train_scaled, y_train)
    
    print('MAE:', mean_absolute_error(y_test, model.predict(X_test_scaled)))
    print('MSE:', mean_squared_error(y_test, model.predict(X_test_scaled)))
    print('RMSE:', np.sqrt(mean_squared_error(y_test, model.predict(X_test_scaled))))
    
    print(pd.DataFrame(model.coef_, feature_names, columns = ['coef']).sort_values(by = 'coef', ascending = False))

In [18]:
train_validate_report(ridge_cv, X_train_scaled, y_train, X_test_scaled, y_test, feature_names = df_1.columns[:])

MAE: 2.3920933436025504
MSE: 8.666974694942374
RMSE: 2.9439726043124743
        coef
F4  0.153404
F6  0.127902
F7  0.120786
F9 -0.065374
F3 -0.153397


Второй вариант

In [19]:
df_2 = df.drop(['T2', 'F1', 'T1', 'F5', 'F10'], axis = 1)
X, y = df_2.values, df['T2'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3,random_state = 42)

#res= clf._predict_proba_lr(X_test,Y_test)

X_train.shape

(655, 7)

In [20]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [21]:
def train_validate_report(model, X_train_scaled, y_train, X_test_scaled, y_test, feature_names):
    
    model.fit(X_train_scaled, y_train)
    
    print('MAE:', mean_absolute_error(y_test, model.predict(X_test_scaled)))
    print('MSE:', mean_squared_error(y_test, model.predict(X_test_scaled)))
    print('RMSE:', np.sqrt(mean_squared_error(y_test, model.predict(X_test_scaled))))
    
    print(pd.DataFrame(model.coef_, feature_names, columns = ['coef']).sort_values(by = 'coef', ascending = False))

In [22]:
train_validate_report(ridge_cv, X_train_scaled, y_train, X_test_scaled, y_test, feature_names = df_2.columns[:])

MAE: 2.4091779168014202
MSE: 8.765646670396103
RMSE: 2.9606834802788535
        coef
F2  0.173727
F4  0.159520
F6  0.125949
F7  0.118288
T3 -0.058801
F9 -0.075542
F3 -0.161315
