## Case 3: Процесс электронно-лучевой сварки
Выпускной проект по курсу: Аналитик данных МГТУ им. Н.Э. Баумана 

Группа: AD11902/1

Слушатель: Русаков Глеб Игоревич

Задача: определить значение глубины и ширины сварочного шва электронно-лучевой сварки.\
Регрессия.

Целевые переменные: 

* Depth - глубина шва
* Width - ширина шва

Используемые признаки: 

* IW - величина сварочного тока
* IF - ток фокусировки электронного пучка
* VW - скорость сварки
* FP - расстояние от поверхности образцов до электронно-оптической системы 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl

%matplotlib inline

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from scipy import stats

Загрузка датасета

In [None]:
data_orig = pd.read_csv('ebw_data.csv')

Анализ данных

In [None]:
data_orig[['IW', 'IF', 'VW', 'FP']].plot()
plt.legend(loc='upper right',framealpha=0.3)

In [None]:
data_orig.head()

Проверка пропусков

In [None]:
data_orig.isnull().sum()

In [None]:
data_orig.info()

Описательная статистика

In [None]:
data_orig.describe().T

Поиск дубликатов

In [None]:
data_orig.nunique()

In [None]:
data_orig.duplicated().sum()

In [None]:
duplicate_data = data_orig[data_orig.duplicated()]
duplicate_data

Проверка пропусков

In [None]:
data_orig.isnull().sum()

Удаление дубликатов

In [None]:
data_orig_deduplicated = data_orig.drop_duplicates()

Анализ после удаления дубликатов

In [None]:
data_orig_deduplicated.info()

In [None]:
data_orig_deduplicated[['IW', 'IF', 'VW', 'FP']].plot()
plt.legend(loc='upper right', framealpha=0.3)

In [None]:
data_orig_deduplicated.describe().T

Убедимся, что все дубликаты удалены

In [None]:
data_orig_deduplicated.duplicated().sum()

Попарная зависимость переменных и гистограммы

In [None]:
sns.pairplot(data_orig_deduplicated)

Корреляция данных

In [None]:
data_orig_deduplicated.corr().round(2)

In [None]:
dataplot = sns.heatmap(data_orig_deduplicated.corr(), cmap='YlGnBu', annot = True)
plt.rcParams['figure.figsize'] = (15, 15)
plt.show()

In [None]:
plt.figure(figsize=(10, 10))
sns.boxplot(data_orig_deduplicated)
plt.show()

In [None]:
plt.figure(figsize=(10,5))
test = sns.boxplot(data_orig_deduplicated['VW'])
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.boxplot(data_orig_deduplicated['FP'])
plt.show()

In [None]:
Q1VW = np.quantile(data_orig_deduplicated['VW'], 0.25)
Q3VW = np.quantile(data_orig_deduplicated['VW'], 0.75)
IQRVW = Q3VW - Q1VW
lowerVW = Q1VW - 1.5 * IQRVW
upperVW = Q3VW + 1.5 * IQRVW

Q1FP = np.quantile(data_orig_deduplicated['FP'], 0.25)
Q3FP = np.quantile(data_orig_deduplicated['FP'], 0.75)
IQRFP = Q3FP - Q1FP
lowerFP = Q1FP - 1.5 * IQRFP
upperFP = Q3FP + 1.5 * IQRFP

In [None]:
print(lowerVW, upperVW)

In [None]:
print(lowerFP, upperFP)

In [None]:
QVW = ((data_orig_deduplicated['VW'] > lowerVW) & (data_orig_deduplicated['VW'] < upperVW))
QFP = ((data_orig_deduplicated['FP'] > lowerFP) & (data_orig_deduplicated['FP'] < upperFP))

In [None]:
data_orig_drop = data_orig_deduplicated[QVW & QFP]

In [None]:
data_orig_drop.isna().sum()

In [None]:
data_orig_drop.info()

In [None]:
data_orig_drop['VW'].value_counts()

In [None]:
data_orig_drop.reset_index(inplace=True)

In [None]:
data_orig_drop.info()

In [None]:
plt.figure(figsize=(10, 10))
sns.boxplot(data_orig_drop[['IW', 'IF', 'VW', 'FP', 'Depth', 'Width']])
plt.show()

In [None]:
plt.figure(figsize=(10,5))
test = sns.boxplot(data_orig_drop['VW'])
plt.show()

In [None]:
Q1VW = np.quantile(data_orig_drop['VW'], 0.25)
Q3VW = np.quantile(data_orig_drop['VW'], 0.75)
IQRVW = Q3VW - Q1VW
lowerVW = Q1VW - 1.5 * IQRVW
upperVW = Q3VW + 1.5 * IQRVW

In [None]:
print(lowerVW, upperVW)

In [None]:
QVW = ((data_orig_drop['VW'] > lowerVW) & (data_orig_drop['VW'] < upperVW))

In [None]:
data_orig_drop = data_orig_drop[QVW]

In [None]:
data_orig_drop.reset_index(inplace=True)

In [None]:
data_orig_drop.info()

In [None]:
plt.figure(figsize=(10, 10))
sns.boxplot(data_orig_drop[['IW', 'IF', 'VW', 'FP', 'Depth', 'Width']])
plt.show()

Анализ данных без выбросов

In [None]:
data_orig_drop.drop(columns=['index', 'level_0'], axis=1, inplace=True)

In [None]:
data_orig_drop[['IW', 'IF', 'VW', 'FP']].plot()
plt.legend(loc='upper right', framealpha=0.3)

In [None]:
data_orig_drop.describe().T

In [None]:
sns.pairplot(data_orig_drop)

Оценка плотности ядра

In [None]:
_, ax = plt.subplots(figsize=(15, 12))
data_orig_drop[['IW', 'IF', 'VW', 'FP']].plot(kind='kde', ax=ax)

Построение и обучение моделей. Нормализация

In [None]:
x_columns = ['IW', 'IF', 'VW', 'FP']
y_columns = ['Depth', 'Width']
# X = pd.DataFrame(data_orig_drop, columns = x_columns)
# y = pd.DataFrame(data_orig_drop, columns = y_columns)

In [None]:
X = pd.DataFrame(data_orig_drop, columns = x_columns)
y = pd.DataFrame(data_orig_drop, columns = y_columns)

In [None]:
X.head()

In [None]:
y.head()

Нормализация

In [None]:
minmax_scaler = MinMaxScaler()
# data_x_norm = minmax_scaler.fit_transform(np.array(X[['IW', 'IF', 'VW', 'FP']]))
# data_norm_x_df = pd.DataFrame(data = data_x_norm, columns = ['IW', 'IF', 'VW', 'FP'])

In [None]:
data_x_norm = minmax_scaler.fit_transform(np.array(X[['IW', 'IF', 'VW', 'FP']]))

In [None]:
data_norm_x_df = pd.DataFrame(data = data_x_norm, columns = ['IW', 'IF', 'VW', 'FP'])

In [None]:
data_norm_x_df.head()

In [None]:
data_norm_x_df.info()

In [None]:
data_norm_x_df.describe().T

In [None]:
plt.figure(figsize=(10, 10))
sns.boxplot(data_norm_x_df)
plt.show()

In [None]:
print(data_norm_x_df['IW'].unique())
print(data_norm_x_df['IF'].unique())
print(data_norm_x_df['VW'].unique())
print(data_norm_x_df['FP'].unique())

In [None]:
_, ax = plt.subplots(figsize=(15, 12))
data_norm_x_df[['IW', 'IF', 'VW', 'FP']].plot(kind='kde', ax=ax)

In [None]:
# x_columns = ['IW', 'IF', 'VW', 'FP']
# y_columns = ['Depth', 'Width']

In [None]:
# X = pd.DataFrame(data_norm_df, columns = x_columns)
# y = pd.DataFrame(data_norm_df, columns= y_columns)

In [None]:
# X.head()

In [None]:
# y.head()

Разделение датасета на обучающую и тренировочную выборки

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data_norm_x_df, y, train_size = 0.8, random_state=42)

Проверка размерностей

In [None]:
print(X_train.shape, y_train.shape)

In [None]:
print(X_test.shape, y_test.shape)

Построение нейросети

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

Входной размер

In [None]:
n_features = data_norm_x_df.shape[1]

Построение модели и обучение

In [91]:


model = Sequential()

model.add(Dense(64, activation = 'relu', input_shape = (n_features,)))
model.add(Dense(64, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(2, activation = 'relu'))

model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics='accuracy')

history = model.fit(X_train, y_train,
                    epochs=150,
                    batch_size=7,
                    verbose=1
                    )
print(history.history['accuracy'])

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

Визуализация потери и точности

In [None]:
model.evaluate(X_test, y_test)

Без удаления выбросов

In [None]:
x_columns = ['IW', 'IF', 'VW', 'FP']
y_columns = ['Depth', 'Width']
X2 = pd.DataFrame(data_orig_deduplicated, columns = x_columns)
y2 = pd.DataFrame(data_orig_deduplicated, columns = y_columns)

In [None]:
minmax_scaler = MinMaxScaler()
data_x_norm2 = minmax_scaler.fit_transform(np.array(X2[['IW', 'IF', 'VW', 'FP']]))
data_norm_x_df2 = pd.DataFrame(data = data_x_norm2, columns = ['IW', 'IF', 'VW', 'FP'])

In [None]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(data_norm_x_df2, y2, train_size = 0.7, random_state=42)


In [None]:
n_features2 = X_train2.shape[1]

In [None]:


modelnn = Sequential()

modelnn.add(Dense(128, activation = 'relu', input_shape = (n_features2,)))
modelnn.add(Dense(128, activation = 'relu'))
modelnn.add(Dense(128, activation = 'relu'))
modelnn.add(Dense(64, activation = 'relu'))
modelnn.add(Dense(32, activation = 'relu'))
modelnn.add(Dense(16, activation = 'relu'))
modelnn.add(Dense(2, activation = 'relu'))

modelnn.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics='accuracy')

historynn = modelnn.fit(X_train2, y_train2,
                    epochs=25,
                    batch_size=3,
                    verbose=1,
                    validation_split = 0.2
                    )
print(historynn.history['accuracy'])

In [None]:
prednn = modelnn.predict(X_train2)

In [None]:
y_train2

In [None]:
prednn

In [None]:
modelnn.evaluate(X_test2)

In [None]:
y_train

In [None]:
pred = model.predict(X_train)

In [None]:
pred

In [None]:
y_train

In [None]:
test = model.evaluate(X_test2, y_test2, batch_size=3, verbose=1)
test

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
model2 = LinearRegression()

model2.fit(X_train2, y_train2)

In [None]:
model2.score(X_test2, y_test2)

In [None]:
y_pred = model2.predict(X_test2)

print(y_pred[:3])

In [None]:
from sklearn import metrics

print('Среднеквадратическая ошибка (RMSE):', np.sqrt(metrics.mean_squared_error(y_test2, y_pred)).round(2))

In [None]:
print('R2:', np.round(metrics.r2_score(y_test2, y_pred), 2))

In [None]:
from sklearn.tree import DecisionTreeRegressor
model3 = DecisionTreeRegressor()
model3.fit(X_train2, y_train2)

In [None]:
model3.score(X_test2, y_test2)

In [None]:
y_pred2 = model3.predict(X_test2)
print(y_pred[:3])

In [None]:
print('Среднеквадратическая ошибка (RMSE):', np.sqrt(metrics.mean_squared_error(y_test2, y_pred2)).round(2))

In [None]:
print('R2:', np.round(metrics.r2_score(y_test2,y_pred2), 2))

In [None]:
from sklearn.linear_model import MultiTaskLassoCV

In [None]:

model4 = MultiTaskLassoCV()
model4.fit(X_train2, y_train2)

In [None]:
model4.score(X_test2, y_test2)

In [None]:
y_pred3 = model4.predict(X_test2)

In [None]:
print('Среднеквадратическая ошибка (RMSE):', np.sqrt(metrics.mean_squared_error(y_test, y_pred3)).round(2))

In [None]:
print('R2:', np.round(metrics.r2_score(y_test2,y_pred3), 2))