### Fault detection in wafers based on sensor data

In [1]:
import pandas as pd
import numpy as np

# Set the number of samples and features in the dataset
n_samples = 1000
n_features = 10

# Create random data for the features
X = np.random.rand(n_samples, n_features)

# Add noise to the data to simulate sensor readings
X = X + np.random.normal(loc=0, scale=0.1, size=X.shape)

# Create a target variable with randomly generated faults
y = np.random.choice(['Yes', 'No'], size=n_samples)

# Combine the features and target variable into a dataframe
df = pd.DataFrame(X, columns=[f'Feature {i+1}' for i in range(n_features)])
df['fault'] = y

# Save the dataframe to a CSV file
df.to_csv('wafer_faults.csv', index=False)

In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Read in the dataset
df = pd.read_csv('wafer_faults.csv')

# Drop any missing values
df = df.dropna()

# Convert the target variable to binary values
df['fault'] = np.where(df['fault'] == 'Yes', 1, 0)

# Split the dataset into features and target variable
X = df.drop('fault', axis=1)
y = df['fault']

# Scale the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a logistic regression model on the training data
model = LogisticRegression()
model.fit(X_train, y_train)

# Generate predictions on the testing data
y_pred = model.predict(X_test)

# Output the confusion matrix and classification report
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Output the coefficients for the logistic regression model
coefficients = pd.DataFrame({'Feature': df.columns[:-1], 'Coefficients': model.coef_[0]})
coefficients = coefficients.sort_values(by='Coefficients', ascending=False)
print(coefficients)

# Save the results to a CSV file
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
results.to_csv('result_6may.csv', index=False)

[[68 44]
 [50 38]]
              precision    recall  f1-score   support

           0       0.58      0.61      0.59       112
           1       0.46      0.43      0.45        88

    accuracy                           0.53       200
   macro avg       0.52      0.52      0.52       200
weighted avg       0.53      0.53      0.53       200

      Feature  Coefficients
4   Feature 5      0.093043
7   Feature 8      0.038150
9  Feature 10      0.016867
0   Feature 1     -0.018882
1   Feature 2     -0.019477
3   Feature 4     -0.072333
8   Feature 9     -0.077264
2   Feature 3     -0.090872
6   Feature 7     -0.099076
5   Feature 6     -0.149594
