In [None]:
# importing the Libraies
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [2]:
dataset=pd.read_csv('PP_Mtrls_and_their_Mchncl_Prprts.csv')
dataset.head()

Unnamed: 0,Material,Su,Sy,E,G,mu,Ro,Use
0,ANSI Steel SAE 1015 asrolled,421,314,207000,79000,0.3,7860,True
1,ANSI Steel SAE 1015 normalized,424,324,207000,79000,0.3,7860,True
2,ANSI Steel SAE 1015 annealed,386,284,207000,79000,0.3,7860,True
3,ANSI Steel SAE 1020 asrolled,448,331,207000,79000,0.3,7860,True
4,ANSI Steel SAE 1020 normalized,441,346,207000,79000,0.3,7860,True


In [3]:
dataset.drop('Material', axis=1, inplace=True)

In [4]:
columns_to_drop = ['Sy','mu']
dataset = dataset.drop(columns=columns_to_drop)
dataset.head()

Unnamed: 0,Su,E,G,Ro,Use
0,421,207000,79000,7860,True
1,424,207000,79000,7860,True
2,386,207000,79000,7860,True
3,448,207000,79000,7860,True
4,441,207000,79000,7860,True


In [5]:
dataset['Use'] = dataset['Use'].astype('object')
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 802 entries, 0 to 801
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Su      802 non-null    int64 
 1   E       802 non-null    int64 
 2   G       802 non-null    int64 
 3   Ro      802 non-null    int64 
 4   Use     802 non-null    object
dtypes: int64(4), object(1)
memory usage: 31.5+ KB


In [6]:
dataset=pd.get_dummies(dataset,drop_first=True)

dataset.head()

Unnamed: 0,Su,E,G,Ro,Use_True
0,421,207000,79000,7860,True
1,424,207000,79000,7860,True
2,386,207000,79000,7860,True
3,448,207000,79000,7860,True
4,441,207000,79000,7860,True


In [7]:
label_encoder = LabelEncoder()

# Fit and transform the target column
dataset['Use_True'] = label_encoder.fit_transform(dataset['Use_True'])
dataset

Unnamed: 0,Su,E,G,Ro,Use_True
0,421,207000,79000,7860,1
1,424,207000,79000,7860,1
2,386,207000,79000,7860,1
3,448,207000,79000,7860,1
4,441,207000,79000,7860,1
...,...,...,...,...,...
797,980,206000,80000,7860,0
798,1100,206000,80000,7860,0
799,1200,206000,80000,7860,0
800,1030,206000,80000,7860,0


In [8]:
indep=dataset[['Su','E','G','Ro']]
dep=dataset["Use_True"]

In [9]:
#split into training set and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(indep, dep, test_size = 1/3, random_state = 0)

In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [11]:
from sklearn.tree import DecisionTreeClassifier

In [12]:
from sklearn.model_selection import GridSearchCV
param_grid = {'criterion':['gini','entropy'],
              'max_features': ['auto','sqrt','log2'],
              'splitter':['best','random']} 
grid = GridSearchCV(DecisionTreeClassifier(), param_grid, refit = True, verbose = 3,n_jobs=-1,scoring='f1_weighted') 
grid.fit(X_train, y_train) 

Fitting 5 folds for each of 12 candidates, totalling 60 fits




In [13]:
re=grid.cv_results_
grid_predictions = grid.predict(X_test) 
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, grid_predictions)
print("The confusion Matrix:\n",cm)

The confusion Matrix:
 [[253   2]
 [  5   8]]


In [14]:
from sklearn.metrics import classification_report
clf_report = classification_report(y_test, grid_predictions)
print("The report:\n",clf_report)

The report:
               precision    recall  f1-score   support

           0       0.98      0.99      0.99       255
           1       0.80      0.62      0.70        13

    accuracy                           0.97       268
   macro avg       0.89      0.80      0.84       268
weighted avg       0.97      0.97      0.97       268



In [15]:
from sklearn.metrics import f1_score
f1_macro=f1_score(y_test,grid_predictions,average='weighted')
print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)

The f1_macro value for best parameter {'criterion': 'gini', 'max_features': 'sqrt', 'splitter': 'best'}: 0.9722535302131602


In [16]:
from sklearn.metrics import roc_auc_score
roc_auc_score=roc_auc_score(y_test,grid.predict_proba(X_test)[:,1])
print("The roc_auc_score is:",roc_auc_score)

The roc_auc_score is: 0.9556561085972851


In [17]:
table=pd.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002074,0.001694,0.001449,0.000731,gini,auto,best,"{'criterion': 'gini', 'max_features': 'auto', ...",0.950789,0.973168,0.979531,0.957592,0.978866,0.967989,0.011682,2
1,0.000819,0.000229,0.00102,0.000362,gini,auto,random,"{'criterion': 'gini', 'max_features': 'auto', ...",0.973168,0.965547,0.951262,0.950368,0.978866,0.963842,0.011449,8
2,0.000406,8.6e-05,0.000902,0.00051,gini,sqrt,best,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.962617,0.965547,0.979531,0.957592,0.978866,0.96883,0.008842,1
3,0.000664,0.00029,0.001789,0.001168,gini,sqrt,random,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.943925,0.948321,0.959062,0.95955,0.965841,0.95534,0.008018,12
4,0.0015,0.001395,0.001749,0.001771,gini,log2,best,"{'criterion': 'gini', 'max_features': 'log2', ...",0.950789,0.965547,0.967495,0.957592,0.978866,0.964058,0.009499,7
5,0.000711,0.000272,0.000817,0.000506,gini,log2,random,"{'criterion': 'gini', 'max_features': 'log2', ...",0.950789,0.965547,0.979531,0.957592,0.957732,0.962238,0.009828,9
6,0.002174,0.001224,0.001355,0.000493,entropy,auto,best,"{'criterion': 'entropy', 'max_features': 'auto...",0.950789,0.965547,0.979531,0.957592,0.970195,0.964731,0.009949,5
7,0.000855,0.000405,0.001273,0.000872,entropy,auto,random,"{'criterion': 'entropy', 'max_features': 'auto...",0.962617,0.962617,0.981308,0.933824,0.965841,0.961241,0.015358,10
8,0.001056,0.000481,0.001394,0.000926,entropy,sqrt,best,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.962617,0.951375,0.979531,0.957592,0.978866,0.965996,0.011355,4
9,0.000667,0.000467,0.000751,0.000565,entropy,sqrt,random,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.962617,0.973168,0.979531,0.957592,0.950326,0.964647,0.010518,6


In [18]:
su_input=float(input("Ultimate Tensile Strength (Su) in MPa:"))
E_input=float(input("Elastic Modulus (E) in MPa:"))
g_input=float(input("Shear Modulus (G) in MPa:"))
ro_input=float(input("Density (Ro) in Kg/m3:"))

Ultimate Tensile Strength (Su) in MPa:1000
Elastic Modulus (E) in MPa:119876
Shear Modulus (G) in MPa:67543
Density (Ro) in Kg/m3:8912


In [20]:
Future_Prediction=grid.predict([[su_input,E_input,g_input,ro_input]])
y=("Future_Prediction={}".format(Future_Prediction))
if (y==0):
    print("Future_Prediction=[False]")
else:
    print('Future_Prediction=[True]')

Future_Prediction=[True]
[CV 2/5] END criterion=gini, max_features=auto, splitter=best;, score=0.973 total time=   0.0s
[CV 1/5] END criterion=entropy, max_features=auto, splitter=best;, score=0.951 total time=   0.0s
[CV 4/5] END criterion=entropy, max_features=auto, splitter=best;, score=0.958 total time=   0.0s
[CV 2/5] END criterion=entropy, max_features=auto, splitter=random;, score=0.963 total time=   0.0s
[CV 1/5] END criterion=entropy, max_features=sqrt, splitter=best;, score=0.963 total time=   0.0s
[CV 2/5] END criterion=entropy, max_features=sqrt, splitter=best;, score=0.951 total time=   0.0s
[CV 2/5] END criterion=entropy, max_features=sqrt, splitter=random;, score=0.973 total time=   0.0s
[CV 3/5] END criterion=entropy, max_features=sqrt, splitter=random;, score=0.980 total time=   0.0s
[CV 3/5] END criterion=entropy, max_features=log2, splitter=random;, score=0.971 total time=   0.0s
[CV 2/5] END criterion=gini, max_features=auto, splitter=random;, score=0.966 total time

Ultimate Tensile Strength (Su) in MPa:1000
Elastic Modulus (E) in MPa:119876
Shear Modulus (G) in MPa:67543
Density (Ro) in Kg/m3:8912