In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.calibration import CalibratedClassifierCV


In [2]:
ds1=pd.read_csv('train.csv')
ds1.drop(['id','Product ID'],axis=1,inplace=True)


In [3]:
def get_categorical_and_numeric_columns(df):
    categorical_columns = []
    numeric_columns = []

    for column in df.columns:
        if df[column].dtype == 'object' or pd.api.types.is_categorical_dtype(df[column].dtype):
            categorical_columns.append(column)
        elif column != 'Machine failure':
            numeric_columns.append(column)

    return categorical_columns, numeric_columns


categorical_columns, numeric_columns  = get_categorical_and_numeric_columns(ds1)
categorical_columns, numeric_columns

(['Type'],
 ['Air temperature [K]',
  'Process temperature [K]',
  'Rotational speed [rpm]',
  'Torque [Nm]',
  'Tool wear [min]',
  'TWF',
  'HDF',
  'PWF',
  'OSF',
  'RNF'])

In [4]:
ytrain=ds1.iloc[:,6]
ytrain
ds1.drop(['Machine failure'],axis=1,inplace=True)
xtrain=ds1.iloc[:,:]


In [5]:
print(xtrain)
print(ytrain)

       Type  Air temperature [K]  Process temperature [K]  \
0         L                300.6                    309.6   
1         M                302.6                    312.1   
2         L                299.3                    308.5   
3         L                301.0                    310.9   
4         M                298.0                    309.0   
...     ...                  ...                      ...   
136424    M                300.1                    311.4   
136425    H                297.5                    308.5   
136426    L                300.5                    311.8   
136427    L                301.7                    310.9   
136428    L                296.9                    308.1   

        Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  TWF  HDF  PWF  \
0                         1596         36.1              140    0    0    0   
1                         1759         29.1              200    0    0    0   
2                         1805

In [6]:
ds2=pd.read_csv('test.csv')
ytest=ds2.iloc[:,0]
ds2.drop(['id','Product ID'],axis=1,inplace=True)
xtest=ds2.iloc[:,:]




In [7]:
print(xtest)
print(ytest)

      Type  Air temperature [K]  Process temperature [K]  \
0        L                302.3                    311.5   
1        L                301.7                    311.0   
2        L                301.3                    310.4   
3        M                300.1                    309.6   
4        M                303.4                    312.3   
...    ...                  ...                      ...   
90949    L                302.3                    311.4   
90950    L                297.9                    309.8   
90951    L                295.6                    306.2   
90952    L                298.1                    307.8   
90953    L                303.5                    312.8   

       Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  TWF  HDF  PWF  \
0                        1499         38.0               60    0    0    0   
1                        1713         28.8               17    0    0    0   
2                        1525         37.7   

In [8]:
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import catboost as cb

In [9]:
X_train, X_test, y_train, y_test = train_test_split(xtrain, ytrain, test_size=0.2, random_state=42)
# Create an SVM classifier
classifier1 = svm.SVC(kernel='rbf',probability=True)

classifier2=RandomForestClassifier(max_depth=10)
# Define the transformations for each column type
preprocessor = ColumnTransformer(
    transformers=[
        ('categorical', OneHotEncoder(), categorical_columns),
        ('numeric', StandardScaler(), numeric_columns)
    ])

# Define the pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', classifier1)
])
pipeline.fit(X_train, y_train)

In [11]:
import tensorflow as tf
from keras import layers, models

In [None]:
# X_train = X_train.reshape((-1, X_train.shape[1], 1))
# X_test = X_test.reshape((-1, X_test.shape[1], 1))

In [None]:
# model = models.Sequential()
# model.add(layers.Conv1D(32, 3, activation='relu', input_shape=(X_train.shape[1], 1)))
# model.add(layers.MaxPooling1D(2))
# model.add(layers.Flatten())
# model.add(layers.Dense(64, activation='relu'))
# model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
# model.compile(optimizer='adam',
#               loss='binary_crossentropy',
#               metrics=['accuracy'])
# model.fit(X_train, y_train, epochs=10, batch_size=128)

# # Make predictions on the test data
# y_pred = model.predict(X_test)
# y_pred_classes = (y_pred > 0.5).astype(int)

# # Calculate the accuracy of the model
# accuracy = accuracy_score(y_test, y_pred_classes)
# print("Accuracy:", accuracy)


In [12]:
y_pred = pipeline.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
y_pred

Accuracy: 0.9961518727552591


array([1, 0, 0, ..., 0, 0, 0])

In [None]:
# xtest_s = xtest_s.reshape((-1, xtest_s.shape[1], 1))

In [None]:
# ypred=model.predict(xtest_s)
# ypred_classes = (ypred > 0.5).astype(int)
# len(ypred_classes)
# ypred_classes = pd.Series(ypred_classes.flatten())

# data = {'id': ytest,'Machine Failure': ypred_classes}
# info = pd.DataFrame(data)
# info.to_csv('sub4.csv',encoding='utf-8',index=False)

In [13]:
ypred=pipeline.predict_proba(xtest)[:,-1]
ypred

array([0.00390072, 0.00380718, 0.00390988, ..., 0.00352268, 0.00381422,
       0.00359692])

In [15]:
data = {'id': ytest,'Machine Failure': ypred}
info = pd.DataFrame(data)
info.to_csv('sub7.csv',encoding='utf-8',index=False)