In [1]:

from sklearn.ensemble import RandomForestRegressor
import numpy as np
import glob

In [2]:
# Create the Random Forest Regressor model
model = RandomForestRegressor(n_estimators=1000, max_features=2, max_depth=7, min_samples_split=40, min_samples_leaf=10)

In [3]:
from sklearn.metrics import mean_absolute_error, r2_score

# Aggregate all my data
x_train, y_train = [], []
for train_path in glob.glob('..\data\\train_FD00[0-9].txt'):
    # Get aggregate path for the same train file
    agg_path = train_path.replace('.txt', '_agg.txt')
    
    # Get both trainiing and aggregate data
    train_data = np.genfromtxt(train_path, delimiter=' ')
    agg_data = np.genfromtxt(agg_path, delimiter=' ')

    # Merge the 2
    train_data = np.hstack((train_data, agg_data))
    
    unique = []
    curr_id = -1
    last = []
    
    # Find the last item for each id
    for d in train_data:
        if d[0] != curr_id:
            if curr_id != -1:
                unique.append(list(last))
            curr_id = d[0]
        last = d
    unique.append(list(last))
    
        
    # Set labels as last cycle - curr cycle (Find a better way)
    labels = np.array([unique[(int(item[0]) - 1) % len(unique)][1] - item[1] for item in train_data])
    
    y_train.extend(labels)
    x_train.extend(train_data[:, 1:])
    

KeyboardInterrupt: 

In [None]:
# Fit model
model.fit(x_train, y_train)

# Make predictions on the training data
y_pred = model.predict(x_train)

In [None]:
# Calculate R-squared for training data
r2 = r2_score(y_train, y_pred)

# Calculate Mean Absolute Error
mae = mean_absolute_error(y_train, y_pred)

print(f"R-squared: {r2:.2f}")    
print("R-squared:", r2)

In [None]:
# Aggregate all my data
x_test, y_test = [], []
for test_path in glob.glob('..\data\\test_FD00[0-9].txt'):
    agg_path = test_path.replace('.txt', '_agg.txt')
    rul_path = test_path.replace('test', 'RUL')
    
    test_data = np.genfromtxt(test_path, delimiter=' ')
    agg_data = np.genfromtxt(agg_path, delimiter=' ')
    y_test.extend(np.genfromtxt(rul_path))

    test_data = np.hstack((test_data, agg_data))
    
    # Get item with last cycle for each id
    unique = []
    curr_id = -1
    last = []
    
    for d in test_data:
        if d[0] != curr_id:
            if curr_id != -1:
                unique.append(list(last))
            curr_id = d[0]
        last = d        
    unique.append(list(last))
    
    unique = [row[1:] for row in unique]
    
    x_test.extend(unique)
    

Mean Squared Error: 1927.755921216407
Mean Absolute Error: 31.15899575671853
R-squared: 0.26093958997874556


In [None]:
# Predict on my data
y_pred = model.predict(x_test)

In [None]:
# Evaluate predictions
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error:", mae)
print("R-squared:", r2)