In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import precision_score, recall_score, f1_score



In [2]:
df = pd.read_excel('Data.xlsx')

df['Date'] = pd.to_datetime(df['Date'])

df = df.sort_values(['Part ID', 'Date'])

### Remove Part IDs with less than 50 days of data

In [3]:
part_counts = df['Part ID'].value_counts()
df_filtered = df[df['Part ID'].isin(part_counts[part_counts >= 50].index)]

### Grouping data week by week

In [4]:
def resample_7d(group):
    cycle_col = group['Cycle'].max()
    mean_cols = group.drop(columns=['Cycle']).set_index('Date').resample('7D').mean().reset_index()
    mean_cols['Cycle'] = cycle_col
    return mean_cols
df_resampled_filtered = df_filtered.groupby('Part ID').apply(resample_7d).reset_index(drop=True)

### Making binary Values and creating a window

In [5]:
df_resampled_filtered['response'] = np.where(df_resampled_filtered['Total Score'] == 0, 0, 1)

feature_columns = ['Feature_1', 'Feature_2', 'Feature_3', 'Feature_4', 'Feature_5', 
                   'Feature_6', 'Feature_7', 'Feature_8', 'Feature_9', 'Feature_10', 
                   'Feature_11', 'Feature_12']

df_resampled_filtered[feature_columns] = df_resampled_filtered[feature_columns].shift(1)
df_resampled_filtered = df_resampled_filtered.dropna()

In [6]:
df_resampled_filtered.head()

Unnamed: 0,Date,Part ID,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10,Feature_11,Feature_12,Total Score,Cycle,response
5,2010-02-02,1.0,15.5,4.5,0.0,0.0,0.0,0.0,4.5,0.0,3.5,0.0,0.0,1.5,31.5,49,1
19,2010-05-11,1.0,0.0,0.0,0.0,3.5,1.0,1.5,4.5,1.5,7.0,2.0,4.5,0.0,12.0,49,1
22,2010-06-01,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,6.0,49,1
43,2010-10-26,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,49,1
44,2010-11-02,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,10.0,49,1


In [7]:
# df_resampled_filtered.to_excel("data new.xlsx")

### Dataset Preparation

In [8]:
X = df_resampled_filtered[feature_columns]
y = df_resampled_filtered['response']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_scaled = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

### Model

In [10]:
model = Sequential()
model.add(LSTM(units=128, return_sequences=False, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [16]:
history = model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, validation_data=(X_test_scaled, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [12]:
loss, accuracy = model.evaluate(X_test_scaled, y_test)
print(f'Test Accuracy: {accuracy:.4f}')

Test Accuracy: 0.7671


### Prediction

In [13]:
X_scaled = scaler.transform(X)
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

y_pred_all = model.predict(X_scaled)
y_pred_all_classes = (y_pred_all > 0.5).astype("int32")

df_resampled_filtered['predict'] = y_pred_all_classes.flatten()

# df_resampled_filtered.to_excel('predictions_future_observation.xlsx', index=False)

# print("Predictions for all data have been added and the file has been saved as 'predictions_future_observations.xlsx'.")




In [14]:
df_resampled_filtered.head()

Unnamed: 0,Date,Part ID,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10,Feature_11,Feature_12,Total Score,Cycle,response,predict
5,2010-02-02,1.0,15.5,4.5,0.0,0.0,0.0,0.0,4.5,0.0,3.5,0.0,0.0,1.5,31.5,49,1,1
19,2010-05-11,1.0,0.0,0.0,0.0,3.5,1.0,1.5,4.5,1.5,7.0,2.0,4.5,0.0,12.0,49,1,1
22,2010-06-01,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,6.0,49,1,1
43,2010-10-26,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,49,1,0
44,2010-11-02,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,10.0,49,1,1


### Precision, Recall and F1 score

In [15]:
y_test_true = y_test

y_pred_test_classes = (model.predict(X_test_scaled) > 0.5).astype("int32")

precision = precision_score(y_test_true, y_pred_test_classes)
recall = recall_score(y_test_true, y_pred_test_classes)
f1 = f1_score(y_test_true, y_pred_test_classes)

print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')


Precision: 0.8450
Recall: 0.8180
F1 Score: 0.8313
