# Fault Detection – Shared LSTM (Option B)
This notebook reproduces the full pipeline required in **description.pdf**: EDA, window generation, model training, evaluation (confusion matrix, ROC, PR) and conclusions.

In [None]:
# Install extra dependencies inside notebook if running on Colab
# !pip install -r ../requirements.txt

## 1. Imports & Configuration

In [None]:
import os, sys, json, itertools, pathlib, math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Append project root to path so we can `import codes.*` when running notebook directly
PROJ_ROOT = pathlib.Path('..').resolve()
if str(PROJ_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJ_ROOT))

from codes.data_utils import load_csv_parts, window_generator, train_val_test_split
from codes.TensorflowDataPreprocessor import TensorflowDataPreprocessor
from codes.LSTM import LSTMModel, F1Score
from codes.ModelEvaluator import ModelEvaluator
import tensorflow as tf

plt.style.use('seaborn-v0_8-darkgrid')

## 2. Load raw dataset

In [None]:
DATA_DIR = PROJ_ROOT / 'dataset'
df_raw = load_csv_parts(DATA_DIR)
df_raw.head()

### Quick EDA: label distribution

In [None]:
label_counts = df_raw['label'].value_counts()
sns.barplot(x=label_counts.index, y=label_counts.values)
plt.xticks(rotation=90)
plt.title('Label distribution (sensor failures)')
plt.show()

## 3. Window generation & split

In [None]:
WINDOW_SIZE = 240
STEP_SIZE = 1
X, y = window_generator(df_raw, window_size=WINDOW_SIZE, step_size=STEP_SIZE, use_diff_mean=True)
print('Total windows:', len(X), 'Positive:', y.sum())

X_train, y_train, X_val, y_val, X_test, y_test = train_val_test_split(X, y)

## 4. Normalization & tf.data

In [None]:
prep = TensorflowDataPreprocessor()
X_train_e, X_val_e, X_test_e = prep.normalize_3way(X_train, X_val, X_test)

## 5. Model training

In [None]:
metrics = [tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'), F1Score()]
model_wrap = LSTMModel(window_size=WINDOW_SIZE, metrics=metrics)
callbacks = model_wrap.setup_callbacks(model_name='notebook')
history = model_wrap.model.fit(
        X_train_e, y_train,
        validation_data=(X_val_e, y_val),
        epochs=100, batch_size=128, shuffle=True,
        class_weight=prep.compute_class_weights(y_train),
        callbacks=callbacks
    )

## 6. Evaluation

In [None]:
probs_test = model_wrap.model.predict(X_test_e).ravel()
evaluator = ModelEvaluator(probs_test, y_test, threshold=-1, minPrecision=0.7)
evaluator.execute()
evaluator.printMetrics(evaluator.metrics)
# Confusion matrix
preds = (probs_test >= evaluator.estimatedThreshold).astype(int)
cm = confusion_matrix(y_test, preds)
ConfusionMatrixDisplay(cm).plot()
plt.show()
# Plot ROC and PR curves
evaluator.plotCurve(evaluator.ROCCurve['falsePositiveRates'], evaluator.ROCCurve['truePositiveRates'], evaluator.ROCCurve['AUROC'], 'FPR', 'TPR', 'ROC Curve')
evaluator.plotCurve(evaluator.PRCurve['recalls'], evaluator.PRCurve['precisions'], evaluator.PRCurve['AUPRC'], 'Recall', 'Precision', 'PR Curve')

## 7. Discussion
Summarize results, limitations and potential improvements (e.g., attention model, spectrogram CNN).