In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
real_data=pd.read_csv('Mechanical_properties_of_low_alloy_steels.csv')
data=real_data.copy()

In [3]:
data.columns

Index(['Alloy code', ' C', ' Si', ' Mn', ' P', ' S', ' Ni', ' Cr', ' Mo',
       ' Cu', 'V', ' Al', ' N', 'Ceq', 'Nb + Ta', ' Temperature (°C)',
       ' 0.2% Proof Stress (MPa)', ' Tensile Strength (MPa)',
       ' Elongation (%)', ' Reduction in Area (%)'],
      dtype='object')

In [4]:
data.drop(['Alloy code', ' Tensile Strength (MPa)',' Elongation (%)', ' Reduction in Area (%)','Ceq'],axis=1,inplace=True)

In [5]:
data.head()

Unnamed: 0,C,Si,Mn,P,S,Ni,Cr,Mo,Cu,V,Al,N,Nb + Ta,Temperature (°C),0.2% Proof Stress (MPa)
0,0.12,0.36,0.52,0.009,0.003,0.089,0.97,0.61,0.04,0.0,0.003,0.0066,0.0,27,342
1,0.12,0.36,0.52,0.009,0.003,0.089,0.97,0.61,0.04,0.0,0.003,0.0066,0.0,100,338
2,0.12,0.36,0.52,0.009,0.003,0.089,0.97,0.61,0.04,0.0,0.003,0.0066,0.0,200,337
3,0.12,0.36,0.52,0.009,0.003,0.089,0.97,0.61,0.04,0.0,0.003,0.0066,0.0,300,346
4,0.12,0.36,0.52,0.009,0.003,0.089,0.97,0.61,0.04,0.0,0.003,0.0066,0.0,400,316


In [6]:
yield_=data[' 0.2% Proof Stress (MPa)']
temp=data[' Temperature (°C)']
data.drop([' 0.2% Proof Stress (MPa)',' Temperature (°C)'],axis=1,inplace=True)

In [7]:
data['Yield_Strength']=yield_
data['Temperature']=temp
data.head()

Unnamed: 0,C,Si,Mn,P,S,Ni,Cr,Mo,Cu,V,Al,N,Nb + Ta,Yield_Strength,Temperature
0,0.12,0.36,0.52,0.009,0.003,0.089,0.97,0.61,0.04,0.0,0.003,0.0066,0.0,342,27
1,0.12,0.36,0.52,0.009,0.003,0.089,0.97,0.61,0.04,0.0,0.003,0.0066,0.0,338,100
2,0.12,0.36,0.52,0.009,0.003,0.089,0.97,0.61,0.04,0.0,0.003,0.0066,0.0,337,200
3,0.12,0.36,0.52,0.009,0.003,0.089,0.97,0.61,0.04,0.0,0.003,0.0066,0.0,346,300
4,0.12,0.36,0.52,0.009,0.003,0.089,0.97,0.61,0.04,0.0,0.003,0.0066,0.0,316,400


In [8]:
x=data.drop('Yield_Strength',axis=1)
y=data.Yield_Strength

In [9]:
print(x.shape, y.shape)

(915, 14) (915,)


In [10]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.15,random_state=1)
print(f'x_train:{x_train.shape}\ty_train:{y_train.shape}\nx_test:{x_test.shape}\ty_test:{y_test.shape}')

x_train:(777, 14)	y_train:(777,)
x_test:(138, 14)	y_test:(138,)


In [11]:
x_train=np.array(x_train)
x_test=np.array(x_test)

In [12]:
from sklearn.preprocessing import StandardScaler
sc_x=StandardScaler()
sc_y=StandardScaler()

x_train_scaled=sc_x.fit_transform(x_train)
y_train=np.array(y_train).reshape(len(y_train),1)
y_train_scaled=sc_y.fit_transform(y_train)

x_test_scaled=sc_x.fit_transform(x_test)

In [13]:
print(f'x_train_scaled:{x_train_scaled.shape}\ty_train_scaled:{y_train_scaled.shape}\nx_test_scaled:{x_test_scaled.shape}')

x_train_scaled:(777, 14)	y_train_scaled:(777, 1)
x_test_scaled:(138, 14)


In [14]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

lr=LinearRegression()
svr=SVR()
dtr=DecisionTreeRegressor()
rfr=RandomForestRegressor()
abr=AdaBoostRegressor()
gbr=GradientBoostingRegressor()
knr=KNeighborsRegressor()

In [15]:
scaled=[lr,svr,knr]
    
def predict(models):
    global best_model
    global max_r2
    best_model=None
    max_r2=0
    for reg in models:
        if reg in scaled:
            model=reg.fit(x_train_scaled,y_train_scaled)
            print(f'Training Score of model {model} is {model.score(x_train_scaled,y_train_scaled)}')
            y_pred=sc_y.inverse_transform(model.predict(x_test_scaled))
        else:
            model=reg.fit(x_train,y_train)
            print(f'Training Score of model {model} is {model.score(x_train,y_train)}')
            y_pred=model.predict(x_test)
        
        current_r2_score=r2_score(y_test,y_pred)
        print(f'R2-Score:{current_r2_score}')
        print(f'Mean Squared Error: {mean_squared_error(y_test,y_pred)}')
        print(f'Mean Absolute Error: {mean_absolute_error(y_test,y_pred)}')
        
        if current_r2_score>max_r2:
            max_r2=current_r2_score
            best_model=model
        
        print('------------------------------------------------------------------------------------------------')

In [16]:
predict([lr,svr,dtr,rfr,abr,gbr,knr])

Training Score of model LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False) is 0.8157133499214047
R2-Score:0.8544586871328652
Mean Squared Error: 2776.552588759605
Mean Absolute Error: 40.28832174430058
------------------------------------------------------------------------------------------------
Training Score of model SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
    gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
    tol=0.001, verbose=False) is 0.938853137018491
R2-Score:0.954888187733888
Mean Squared Error: 860.6169386795139
Mean Absolute Error: 21.83090198614225
------------------------------------------------------------------------------------------------
Training Score of model DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
                      max_leaf_nodes=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_sa

In [17]:
pred=np.array([[0.22,0.22,1.24,0.021,0.008,0.03,0.05,0.017,0.01,0.005,0.005,0.0116,0.0017,150]])

In [18]:
best_model.predict(pred)

array([378.])

In [19]:
pred=np.array([[0.15,0.28,1.32,0.019,0.005,0.077,0.053,0.008,0.08,0.07,0.017,0.0066,0,100]])

In [20]:
best_model.predict(pred)

array([482.3])

In [21]:
import joblib
joblib.dump(best_model,'yield_model_v21.obj')

['yield_model_v21.obj']

# TODO:

### Try gridsearch

In [22]:
import sklearn

In [23]:
sklearn.__version__

'0.21.3'