In [1]:
# Import Required Libraries

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [2]:
# Load the Dataset
data = pd.read_csv("/content/Dataset.csv")

print("Dataset Shape:", data.shape)
print("\nFirst 5 rows:\n", data.head())
print("\nDataset Info:")
print(data.info())


Dataset Shape: (10000, 10)

First 5 rows:
    UDI Product ID Type  Air temperature [K]  Process temperature [K]  \
0    1     M14860    M                298.1                    308.6   
1    2     L47181    L                298.2                    308.7   
2    3     L47182    L                298.1                    308.5   
3    4     L47183    L                298.2                    308.6   
4    5     L47184    L                298.2                    308.7   

   Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  Target Failure Type  
0                    1551         42.8                0       0   No Failure  
1                    1408         46.3                3       0   No Failure  
2                    1498         49.4                5       0   No Failure  
3                    1433         39.5                7       0   No Failure  
4                    1408         40.0                9       0   No Failure  

Dataset Info:
<class 'pandas.core.frame.DataFrame

In [3]:
# Separate Features & Target

X = data.iloc[:, :-1]
y = data.iloc[:, -1]


In [4]:
# Handle Missing Values
for col in X.select_dtypes(include=np.number).columns:
    X[col].fillna(X[col].mean(), inplace=True)

for col in X.select_dtypes(include="object").columns:
    X[col].fillna(X[col].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X[col].fillna(X[col].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X[col].fillna(X[col].mode()[0], inplace=True)


In [5]:
# Encode Categorical Features
encoder = LabelEncoder()

for col in X.select_dtypes(include="object").columns:
    X[col] = encoder.fit_transform(X[col])


is_classification = False
if y.dtype == "object":
    y = encoder.fit_transform(y)
    is_classification = True


In [6]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [7]:
# Feature Scaling

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# Model Selection & Training
if is_classification:
    print("\nProblem Type: Classification")
    model = LogisticRegression(max_iter=1000)
else:
    print("\nProblem Type: Regression")
    model = LinearRegression()

model.fit(X_train, y_train)


Problem Type: Classification


In [9]:
# Model Evaluation

y_pred = model.predict(X_test)

if is_classification:
    y_pred = np.round(y_pred)

    print("\nAccuracy:", accuracy_score(y_test, y_pred))
    print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

else:
    print("\nMean Absolute Error (MAE):", mean_absolute_error(y_test, y_pred))
    print("Mean Squared Error (MSE):", mean_squared_error(y_test, y_pred))
    print("R2 Score:", r2_score(y_test, y_pred))



Accuracy: 0.993

Confusion Matrix:
 [[  15    0    0    0    0    0]
 [   1 1933    0    0    0    1]
 [   0    0   13    0    0    0]
 [   2    0    1   17    0    0]
 [   0    6    0    0    0    0]
 [   0    0    3    0    0    8]]

Classification Report:
               precision    recall  f1-score   support

           0       0.83      1.00      0.91        15
           1       1.00      1.00      1.00      1935
           2       0.76      1.00      0.87        13
           3       1.00      0.85      0.92        20
           4       0.00      0.00      0.00         6
           5       0.89      0.73      0.80        11

    accuracy                           0.99      2000
   macro avg       0.75      0.76      0.75      2000
weighted avg       0.99      0.99      0.99      2000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [11]:
# Model Interpretation
if not is_classification:
    print("\nModel Coefficients:")
    for feature, coef in zip(data.columns[:-1], model.coef_):
        print(f"{feature}: {coef}")