In [2]:
"""Import the required modules"""
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from scipy.linalg import pinv2
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import timeit

Analyzing welding data by welding types

In [10]:
df2 = pd.read_csv('saw.csv')
df2.drop(['welding_type', 'heat_input'], axis=1, inplace=True)

df2.head()

Unnamed: 0,current,voltage,welding_speed,plate_thickness,tensile_strength,flexural_strength,hardness_value,residual_stress
0,100,28,120,8,385.5,276.5,128.4,186.6
1,160,32,120,6,354.6,270.0,108.4,312.8
2,160,28,90,10,343.7,268.2,95.6,325.2
3,160,24,150,8,410.5,302.4,136.6,260.8
4,100,32,150,10,370.2,272.8,116.2,142.0


Exploratory data analysis

Generate the necessary variables for modeling

In [11]:
X1 = df2.iloc[:, :-4]
X = X1.to_numpy()
y1 = df2.iloc[:, -4:]
y = y1.to_numpy()

In [14]:
def relu(x):
   return np.maximum(x, 0, x)

In [15]:
def hidden_nodes(X):
    G = np.dot(X, input_weights)
    G = G + biases
    H = relu(G)
    return H

In [16]:
def predict(X):
    out = hidden_nodes(X)
    out = np.dot(out, output_weights)
    return out

In [17]:
def MAD(y, y_pred):
    '''Calculates the Mean Absolute Deviation of two arrays of numbers
    Input: y(array), y_pred(array)
    Output: float
    '''
    import numpy as np

    return np.average(np.abs(y - y_pred))

In [18]:
def MAPE(y, y_pred):
    '''
    Calculates the relationship between the average prediction error
    and the average expected target error for several consecutive periods.

    Input: y(array), y_pred(array)
    Output: float
    '''
    import numpy as np

    # Calculate prediction error
    error = y - y_pred

    return np.average((np.abs(error)/y) * 100)

In [19]:
def TS(y, y_pred):
    '''
    This index is used to track the accuracy of the model

    Input: y(array), y_pred(array)
    Output: float
    '''
    import numpy as np

    def MAD(y, y_pred):
        '''Calculates the Mean Absolute Deviation of two arrays of numbers
        Input: y(array), y_pred(array)
        Output: float
        '''
        return np.average(np.abs(y - y_pred))

    return np.sum(y - y_pred) / MAD(y, y_pred)

In [20]:
def split_name(name, char='_'):
    index = name.find(char)
    return name[:index].capitalize() + ' ' + name[index+1:].capitalize()


In [21]:
# Split data in readiness for training.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, test_size=0.2)

# Convert test data to dataframe and print it.
y_test_df = pd.DataFrame(y_test, columns=['flexural_strength',	'hardness_value',	'residual_stress',	'tensile_strength'])

Predicting individual response separately to optimize prediction

In [28]:
metrics = []
for hidden_size in range(10, 35, 10):
    print(f'\nELM Analysis for {hidden_size} Node(s)')
    for i in range(y.shape[1]):
        metric = [y1.columns[i], int(hidden_size)]
        cols = y[:, i]
        X_train, X_test, y_train, y_test = train_test_split(X, cols,random_state=1, test_size=0.5)

        tic = timeit.timeit() #Start timer
        input_size = X_train.shape[1]
        input_weights = np.random.normal(size=[input_size,hidden_size])
        biases = np.random.normal(size=[hidden_size])
        output_weights = np.dot(pinv2(hidden_nodes(X_train)), y_train)
        prediction = predict(X_test)
        toc = timeit.timeit() #Stop timer

        # Display data
        print(f'\n{y1.columns[i]}: {int(hidden_size)} Nodes')
        a = []
        for j in range(len(y_test)):
            a.append([y_test[j], prediction[j]])

        a_df = pd.DataFrame(a, columns=['Expected', 'Predicted'])

        # Display output
        print(a_df)
        # plt.plot(y_test, prediction, '*', label='Predicted')
        # plt.plot(y_test, y_test, label='Best fit line')
        # plt.title(f'{split_name(y1.columns[i])} Prediction with {int(hidden_size/200)} Nodes')
        # plt.legend()
        # plt.show()
        
        

        # Calculate the necessary metrics
        r2 = r2_score(prediction, y_test)
        mse = mean_squared_error(y_test, prediction)
        rmse = np.sqrt(mse)
        mad = MAD(y_test, prediction)
        mape = MAPE(y_test, prediction)
        ts = TS(y_test, prediction)
        time = abs(toc - tic)

        metric.extend([r2, mse, rmse, mad, mape, ts, time])   # Update metric
        metrics.append(metric)  # Undate metrics


metrics_df = pd.DataFrame(metrics, columns=['Variable', 'Nodes', 'R2', 'MSE', 'RMSE', 'MAD', 'MAPE', 'TS', 'Time'])

print(metrics_df)


tensile_strength: 10 Nodes
   Expected   Predicted
0     430.5  416.526510
1     343.7  684.802797
2     350.8  662.363409
3     390.6  390.873490
4     354.6  659.149778

flexural_strength: 10 Nodes
   Expected   Predicted
0     312.6  292.507089
1     268.2  640.589771
2     264.8  570.079082
3     293.2  288.028264
4     270.0  548.155571

hardness_value: 10 Nodes
   Expected   Predicted
0     152.4  136.282400
1      95.6  363.446300
2      98.5  320.020154
3     130.2  135.295847
4     108.4  296.340778

residual_stress: 10 Nodes
   Expected   Predicted
0     265.4  247.988050
1     325.2  659.929134
2     318.5  526.496351
3     220.6  174.769451
4     312.8  539.308301

tensile_strength: 20 Nodes
   Expected   Predicted
0     430.5  361.159915
1     343.7  706.266773
2     350.8  719.810847
3     390.6  434.840085
4     354.6  737.605463

flexural_strength: 20 Nodes
   Expected   Predicted
0     312.6  282.690514
1     268.2  655.699313
2     264.8  594.467457
3     293.2  295.