In [15]:
import pandas as pd
pd.options.display.max_columns = None
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline
import umap
import warnings
warnings.filterwarnings('ignore')

In [11]:
df = pd.read_csv('faults.csv')
df.columns = map(str.lower, df.columns)
target_cols = ['pastry', 'z_scratch', 'k_scatch', 'stains', 'dirtiness', 'bumps', 'other_faults'] 
feat_col = ['x_minimum', 'x_maximum', 'y_minimum', 'y_maximum', 'pixels_areas', 'x_perimeter', 'y_perimeter', 'sum_of_luminosity', 'minimum_of_luminosity', 'maximum_of_luminosity', 'length_of_conveyer', 'typeofsteel_a300', 'typeofsteel_a400', 'steel_plate_thickness', 'edges_index', 'empty_index', 'square_index', 'outside_x_index', 'edges_x_index', 'edges_y_index', 'outside_global_index', 'logofareas', 'log_x_index', 'log_y_index', 'orientation_index', 'luminosity_index', 'sigmoidofareas'] 
enc_dict = {'pastry' : 0, 
            'z_scratch' : 1,
            'k_scatch' : 2, 
            'stains' : 3,
            'dirtiness' : 4, 
            'bumps' : 5, 
            'other_faults' : 6}

df['anomaly'] = pd.from_dummies(df[target_cols]).replace(enc_dict)

Without preprocessing

In [2]:
gnb = GaussianNB()

In [6]:
X_full = df[feat_col]
y_full = df['anomaly']
X_full_train, X_full_test, y_full_train, y_full_test = train_test_split(X_full, y_full, test_size = 0.4, random_state = 2)

In [7]:
gnb.fit(X_full_train, y_full_train)
y_pred = gnb.predict(X_full_test)

In [10]:
gnb_accuracy = round(accuracy_score(y_full_test, y_pred), 2)
print(f'Accuracy score for naive bayes classifier: {gnb_accuracy}%')

Accuracy score for naive bayes classifier: 0.44%


Dimensionality reduction

In [17]:
sc = StandardScaler()
ump = umap.UMAP(n_neighbors = 5, n_components = 10, random_state = 2)

#df_nonbin = df.drop(['typeofsteel_a300', 'typeofsteel_a400', 'outside_global_index', 'anomaly'], axis = 1)


In [16]:
df.columns

Index(['x_minimum', 'x_maximum', 'y_minimum', 'y_maximum', 'pixels_areas',
       'x_perimeter', 'y_perimeter', 'sum_of_luminosity',
       'minimum_of_luminosity', 'maximum_of_luminosity', 'length_of_conveyer',
       'typeofsteel_a300', 'typeofsteel_a400', 'steel_plate_thickness',
       'edges_index', 'empty_index', 'square_index', 'outside_x_index',
       'edges_x_index', 'edges_y_index', 'outside_global_index', 'logofareas',
       'log_x_index', 'log_y_index', 'orientation_index', 'luminosity_index',
       'sigmoidofareas', 'pastry', 'z_scratch', 'k_scatch', 'stains',
       'dirtiness', 'bumps', 'other_faults', 'anomaly'],
      dtype='object')

In [18]:


# Example feature indices or names
continuous_features = ['x_minimum', 'x_maximum', 'y_minimum', 'y_maximum', 'pixels_areas',
       'x_perimeter', 'y_perimeter', 'sum_of_luminosity',
       'minimum_of_luminosity', 'maximum_of_luminosity', 'length_of_conveyer',
       'steel_plate_thickness',
       'edges_index', 'empty_index', 'square_index', 'outside_x_index',
       'edges_x_index', 'edges_y_index', 'logofareas',
       'log_x_index', 'log_y_index', 'orientation_index', 'luminosity_index',
       'sigmoidofareas', 'pastry', 'z_scratch', 'k_scatch', 'stains',
       'dirtiness', 'bumps', 'other_faults', 'anomaly'] # Replace with your continuous feature names or indices
binary_features = ['typeofsteel_a300', 'typeofsteel_a400', 'outside_global_index']      # Replace with your binary feature names or indices

# Preprocessing pipelines
preprocessor = ColumnTransformer(
    transformers=[
        ('num', sc, continuous_features),
        ('bin', 'passthrough', binary_features)
    ]
)

# Full pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('umap', ump)  # Replace with UMAP parameters as needed
])

# Fit and transform the data
X_transformed = pipeline.fit_transform(X)


NameError: name 'X' is not defined