In [297]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder,OrdinalEncoder,LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.impute import SimpleImputer
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, confusion_matrix, accuracy_score, recall_score, f1_score, precision_score,ConfusionMatrixDisplay
from xgboost import XGBRegressor, 
from ydata_profiling import ProfileReport
import joblib as jb


In [298]:
data_path = 'kaggle_data.csv'
imp_data = pd.read_csv(data_path)


In [299]:
#Data Analysis

imp_data.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [300]:
from sklearn.base import TransformerMixin 
class MyLabelEncoder(TransformerMixin):
    def __init__(self, *args, **kwargs):
        self.encoder = LabelEncoder(*args, **kwargs)
    def fit(self, x, y=0):
        self.encoder.fit(x)
        return self
    def transform(self, x, y=0):
        return self.encoder.transform(x)

In [301]:
#Preprocess
drop_cols = ['UDI',	'Product ID',	'Type',	'Air temperature [K]', 'Torque [Nm]',	'Tool wear [min]','Failure Type']
data = imp_data.drop(columns=drop_cols)


In [302]:
#profile = ProfileReport(data,explorative=True)
#profile

In [303]:
for i in range(0,len(data.index)):
    data.loc[i,'Process temperature [K]'] = data.loc[i,'Process temperature [K]'] - 273.15

In [304]:
data

Unnamed: 0,Process temperature [K],Rotational speed [rpm],Target
0,35.45,1551,0
1,35.55,1408,0
2,35.35,1498,0
3,35.45,1433,0
4,35.55,1408,0
...,...,...,...
9995,35.25,1604,0
9996,35.25,1632,0
9997,35.45,1645,0
9998,35.55,1408,0


In [305]:
model = ()

In [306]:

feature_map = {'Process temperature [K]':'Process temperature','Rotational speed [rpm]':'Rotational speed'}

targets = ['Target']
features = ['Process temperature','Rotational speed']
data = data.rename(columns=feature_map)

X = data[features]
Y =data[targets]



In [307]:
num_feat = data.select_dtypes(include=['number']).columns.tolist()
cat_feat = data.select_dtypes(include=['object', 'category']).columns.tolist()
num_feat.remove('Target')

In [308]:
num_transformer = Pipeline(steps = [('imputer1',SimpleImputer()),('scaler',StandardScaler())])
#cat_transformer = Pipeline(steps = [('imputer2',SimpleImputer(strategy='most_frequent')),('encode',MyLabelEncoder())])

In [309]:
preprocess = ColumnTransformer(transformers=[('num',num_transformer,num_feat)])
main = Pipeline(steps=[('preprocess',preprocess)])

In [310]:
def evaluate(model,X_test,Y_test):
    print(model,':')
    pred = model.predict(X_test)
    print(mean_absolute_error(pred,Y_test))
    print(np.sqrt(mean_squared_error(pred,Y_test)))
  

In [311]:
def evaluate2(model,X_test,Y_test):
    print(model,':')
    pred = model.predict(X_test)
    print(accuracy_score(pred,Y_test))
    print(confusion_matrix(pred,Y_test))

In [312]:
X = preprocess.fit_transform(X)

In [313]:
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

In [314]:
from sklearn.linear_model import XGBClassifier
start = time.time()
model = XGBClassifier().fit(X_train,y_train)
end_train = time.time()
y_predictions = model.predict(X_test)
end_predict = time.time()

accuracy = accuracy_score(y_test, y_predictions)
recall = recall_score(y_test, y_predictions, average='weighted')
precision = precision_score(y_test, y_predictions, average='weighted')
f1s = f1_score(y_test, y_predictions, average='weighted')

print("Accuracy: "+ "{:.2%}".format(accuracy))
print("Recall: "+ "{:.2%}".format(recall))
print("Precision: "+ "{:.2%}".format(precision))
print("F1-Score: "+ "{:.2%}".format(f1s))
print("time to train: "+ "{:.2f}".format(end_train-start)+" s")
print("time to predict: "+"{:.2f}".format(end_predict-end_train)+" s")
print("total: "+"{:.2f}".format(end_predict-start)+" s")
    
    

ImportError: cannot import name 'XGBClassifier' from 'sklearn.linear_model' (c:\Users\moham\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\linear_model\__init__.py)

In [None]:
jb.dump(filename='predict.pkl',value=model)

['predict.pkl']

In [None]:
jb.dump(filename='prep.pkl',value=preprocess)

['prep.pkl']