In [1]:
import pandas as pd

# Load the dataset
file_path = "machine failure.csv"
df = pd.read_csv(file_path)

In [3]:
df.head(10)

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,0,0,0,0,0
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,0,0,0,0,0
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,0,0,0,0,0
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,0,0,0,0,0
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,0,0,0,0,0
5,6,M14865,M,298.1,308.6,1425,41.9,11,0,0,0,0,0,0
6,7,L47186,L,298.1,308.6,1558,42.4,14,0,0,0,0,0,0
7,8,L47187,L,298.1,308.6,1527,40.2,16,0,0,0,0,0,0
8,9,M14868,M,298.3,308.7,1667,28.6,18,0,0,0,0,0,0
9,10,M14869,M,298.5,309.0,1741,28.0,21,0,0,0,0,0,0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   UDI                      10000 non-null  int64  
 1   Product ID               10000 non-null  object 
 2   Type                     10000 non-null  object 
 3   Air temperature [K]      10000 non-null  float64
 4   Process temperature [K]  10000 non-null  float64
 5   Rotational speed [rpm]   10000 non-null  int64  
 6   Torque [Nm]              10000 non-null  float64
 7   Tool wear [min]          10000 non-null  int64  
 8   Machine failure          10000 non-null  int64  
 9   TWF                      10000 non-null  int64  
 10  HDF                      10000 non-null  int64  
 11  PWF                      10000 non-null  int64  
 12  OSF                      10000 non-null  int64  
 13  RNF                      10000 non-null  int64  
dtypes: float64(3), int64(9)

In [6]:
df.isnull().sum()

UDI                        0
Product ID                 0
Type                       0
Air temperature [K]        0
Process temperature [K]    0
Rotational speed [rpm]     0
Torque [Nm]                0
Tool wear [min]            0
Machine failure            0
TWF                        0
HDF                        0
PWF                        0
OSF                        0
RNF                        0
dtype: int64

In [7]:
from sklearn.preprocessing import LabelEncoder

# Copy the dataframe to preserve original data
df_fe = df.copy()

In [8]:
# Encoding categorical feature 'Type'
label_encoder = LabelEncoder()
df_fe['Type_Encoded'] = label_encoder.fit_transform(df_fe['Type'])

In [9]:
#  Creating interaction features
df_fe['Torque_per_RPM'] = df_fe['Torque [Nm]'] / (df_fe['Rotational speed [rpm]'] + 1)  # Avoid division by zero
df_fe['Temperature_Diff'] = df_fe['Process temperature [K]'] - df_fe['Air temperature [K]']

In [10]:
# Rolling statistics (Window = 5 for demonstration)
df_fe['Rolling_Mean_Torque'] = df_fe['Torque [Nm]'].rolling(window=5, min_periods=1).mean()
df_fe['Rolling_Std_Torque'] = df_fe['Torque [Nm]'].rolling(window=5, min_periods=1).std()

In [11]:
#  Aggregated statistics by 'Type'
agg_features = df_fe.groupby('Type').agg({
    'Air temperature [K]': ['mean', 'std'],
    'Process temperature [K]': ['mean', 'std'],
    'Rotational speed [rpm]': ['mean', 'std'],
    'Torque [Nm]': ['mean', 'std']
})
agg_features.columns = ['_'.join(col) for col in agg_features.columns]  # Flatten column names

In [12]:
# Merge back aggregated features
df_fe = df_fe.merge(agg_features, on='Type', how='left')

In [14]:
# 5. Log Transformation for Torque (to normalize distribution)
## Need to import Numpy library
import numpy as np
df_fe['Log_Torque'] = df_fe['Torque [Nm]'].apply(lambda x: np.log1p(x))

In [15]:
# Drop original categorical columns that were encoded
df_fe.drop(columns=['Type', 'Product ID'], inplace=True)

In [17]:
# Display the updated dataset structure
df_fe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 26 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   UDI                           10000 non-null  int64  
 1   Air temperature [K]           10000 non-null  float64
 2   Process temperature [K]       10000 non-null  float64
 3   Rotational speed [rpm]        10000 non-null  int64  
 4   Torque [Nm]                   10000 non-null  float64
 5   Tool wear [min]               10000 non-null  int64  
 6   Machine failure               10000 non-null  int64  
 7   TWF                           10000 non-null  int64  
 8   HDF                           10000 non-null  int64  
 9   PWF                           10000 non-null  int64  
 10  OSF                           10000 non-null  int64  
 11  RNF                           10000 non-null  int64  
 12  Type_Encoded                  10000 non-null  int32  
 13  To

In [18]:
df_fe.head(10)

Unnamed: 0,UDI,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,...,Rolling_Std_Torque,Air temperature [K]_mean,Air temperature [K]_std,Process temperature [K]_mean,Process temperature [K]_std,Rotational speed [rpm]_mean,Rotational speed [rpm]_std,Torque [Nm]_mean,Torque [Nm]_std,Log_Torque
0,1,298.1,308.6,1551,42.8,0,0,0,0,0,...,,300.029263,2.017358,310.018785,1.498407,1537.598932,179.059828,40.017251,9.992153,3.779634
1,2,298.2,308.7,1408,46.3,3,0,0,0,0,...,2.474874,300.015833,1.987453,310.0123,1.475247,1539.469167,180.428518,39.9966,10.012335,3.85651
2,3,298.1,308.5,1498,49.4,5,0,0,0,0,...,3.30202,300.015833,1.987453,310.0123,1.475247,1539.469167,180.428518,39.9966,10.012335,3.919991
3,4,298.2,308.6,1433,39.5,7,0,0,0,0,...,4.28719,300.015833,1.987453,310.0123,1.475247,1539.469167,180.428518,39.9966,10.012335,3.701302
4,5,298.2,308.7,1408,40.0,9,0,0,0,0,...,4.22315,300.015833,1.987453,310.0123,1.475247,1539.469167,180.428518,39.9966,10.012335,3.713572
5,6,298.1,308.6,1425,41.9,11,0,0,0,0,...,4.284507,300.029263,2.017358,310.018785,1.498407,1537.598932,179.059828,40.017251,9.992153,3.758872
6,7,298.1,308.6,1558,42.4,14,0,0,0,0,...,3.972782,300.015833,1.987453,310.0123,1.475247,1539.469167,180.428518,39.9966,10.012335,3.770459
7,8,298.1,308.6,1527,40.2,16,0,0,0,0,...,1.270827,300.015833,1.987453,310.0123,1.475247,1539.469167,180.428518,39.9966,10.012335,3.718438
8,9,298.3,308.7,1667,28.6,18,0,0,0,0,...,5.697543,300.029263,2.017358,310.018785,1.498407,1537.598932,179.059828,40.017251,9.992153,3.387774
9,10,298.5,309.0,1741,28.0,21,0,0,0,0,...,7.278874,300.029263,2.017358,310.018785,1.498407,1537.598932,179.059828,40.017251,9.992153,3.367296


In [19]:
df_fe.tail(10)

Unnamed: 0,UDI,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,...,Rolling_Std_Torque,Air temperature [K]_mean,Air temperature [K]_std,Process temperature [K]_mean,Process temperature [K]_std,Rotational speed [rpm]_mean,Rotational speed [rpm]_std,Torque [Nm]_mean,Torque [Nm]_std,Log_Torque
9990,9991,298.8,308.5,1527,36.2,3,0,0,0,0,...,6.713941,300.015833,1.987453,310.0123,1.475247,1539.469167,180.428518,39.9966,10.012335,3.616309
9991,9992,298.9,308.4,1827,26.1,5,0,0,0,0,...,5.918868,300.029263,2.017358,310.018785,1.498407,1537.598932,179.059828,40.017251,9.992153,3.299534
9992,9993,298.8,308.4,1484,39.2,8,0,0,0,0,...,6.477885,300.015833,1.987453,310.0123,1.475247,1539.469167,180.428518,39.9966,10.012335,3.693867
9993,9994,298.8,308.4,1401,47.3,10,0,0,0,0,...,7.794549,300.015833,1.987453,310.0123,1.475247,1539.469167,180.428518,39.9966,10.012335,3.877432
9994,9995,298.8,308.3,1634,27.9,12,0,0,0,0,...,8.651763,300.015833,1.987453,310.0123,1.475247,1539.469167,180.428518,39.9966,10.012335,3.363842
9995,9996,298.8,308.4,1604,29.5,14,0,0,0,0,...,8.997222,300.029263,2.017358,310.018785,1.498407,1537.598932,179.059828,40.017251,9.992153,3.417727
9996,9997,298.9,308.4,1632,31.8,17,0,0,0,0,...,8.058102,299.866999,2.021831,309.925723,1.489362,1538.147557,173.133428,39.838285,9.642339,3.490429
9997,9998,299.0,308.6,1645,33.4,22,0,0,0,0,...,7.738669,300.029263,2.017358,310.018785,1.498407,1537.598932,179.059828,40.017251,9.992153,3.538057
9998,9999,299.0,308.7,1408,48.5,25,0,0,0,0,...,8.256331,299.866999,2.021831,309.925723,1.489362,1538.147557,173.133428,39.838285,9.642339,3.901973
9999,10000,299.0,308.7,1500,40.2,30,0,0,0,0,...,7.717318,300.029263,2.017358,310.018785,1.498407,1537.598932,179.059828,40.017251,9.992153,3.718438
