In [81]:

from sklearn.linear_model import LogisticRegression
import numpy as np
import glob

In [82]:
# Create the Random Forest Regressor model
class_weight = {0: 1.0, 1: 22}
model = LogisticRegression(max_iter=165000, class_weight=class_weight)
need_maintenance = 30

In [83]:
from sklearn.metrics import accuracy_score, classification_report

# Aggregate all my data
x_train, y_train = [], []
for train_path in glob.glob('..\data\\train_FD00[0-9].txt'):
    # Get aggregate path for the same train file
    agg_path = train_path.replace('.txt', '_agg.txt')
    
    # Get both trainiing and aggregate data
    train_data = np.genfromtxt(train_path, delimiter=' ')
    agg_data = np.genfromtxt(agg_path, delimiter=' ')

    # Merge the 2
    train_data = np.hstack((train_data, agg_data))
    
    unique = []
    curr_id = -1
    last = []
    
    # Find the last item for each id
    for d in train_data:
        if d[0] != curr_id:
            if curr_id != -1:
                unique.append(list(last))
            curr_id = d[0]
        last = d
    unique.append(list(last))
    
        
    # Set labels as last cycle - curr cycle (Find a better way)
    labels = np.array([(unique[(int(item[0]) - 1) % len(unique)][1] - item[1]) < need_maintenance for item in train_data])
    
    y_train.extend(labels)
    x_train.extend(train_data[:, 1:])
    

In [84]:
# Fit model
model.fit(x_train, y_train)

# Make predictions on the training data
y_pred = model.predict(x_train)

STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [85]:
# Evaluate the model on training data
accuracy = accuracy_score(y_train, y_pred)
report = classification_report(y_train, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)

Accuracy: 0.87
Classification Report:
              precision    recall  f1-score   support

       False       1.00      0.85      0.92    139089
        True       0.50      0.98      0.66     21270

    accuracy                           0.87    160359
   macro avg       0.75      0.91      0.79    160359
weighted avg       0.93      0.87      0.88    160359



In [86]:
# Aggregate all my data
x_test, y_test = [], []
for test_path in glob.glob('..\data\\test_FD00[0-9].txt'):
    agg_path = test_path.replace('.txt', '_agg.txt')
    rul_path = test_path.replace('test', 'RUL')
    
    test_data = np.genfromtxt(test_path, delimiter=' ')
    agg_data = np.genfromtxt(agg_path, delimiter=' ')
    
    labels = np.array([x < need_maintenance for x in np.genfromtxt(rul_path)])
    y_test.extend(labels)

    test_data = np.hstack((test_data, agg_data))
    
    # Get item with last cycle for each id
    unique = []
    curr_id = -1
    last = []
    
    for d in test_data:
        if d[0] != curr_id:
            if curr_id != -1:
                unique.append(list(last))
            curr_id = d[0]
        last = d        
    unique.append(list(last))
    
    unique = [row[1:] for row in unique]
    
    x_test.extend(unique)

    

In [87]:
# Predict on my data
y_pred = model.predict(x_test)

In [90]:
# Predict probabilities of the positive class 
probabilities = model.predict_proba(x_test)[:, 1]
# Set a new threshold 0.3 to classify instances
new_threshold = 0.3
predictions = (probabilities >= new_threshold).astype(int)

In [91]:
from sklearn.metrics import confusion_matrix

# Evaluate the model on test data
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, predictions)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)

# Create the confusion matrix
cm = confusion_matrix(y_test, predictions)

print("Confusion Matrix:")
print(cm)

Accuracy: 0.86
Classification Report:
              precision    recall  f1-score   support

       False       0.91      0.91      0.91       550
        True       0.68      0.68      0.68       157

    accuracy                           0.86       707
   macro avg       0.80      0.80      0.80       707
weighted avg       0.86      0.86      0.86       707

Confusion Matrix:
[[500  50]
 [ 50 107]]
