In [1]:
from model import *
from utils import *
from training import *
from data_processing import *

import importlib
import imports
importlib.reload(imports)

device = 'mps'
model_name = 'optimized'

# Enable autoreload
%load_ext autoreload
%autoreload 2

In [2]:
folder = os.getcwd()
filename = f'{folder}/ncmapps_ds02.csv'
print(filename)

/Users/alexei.ermochkine/Desktop/ma5/ML4PM/assignment_3_graded/ML4PM_assignment3/ncmapps_ds02.csv


In [3]:
df = pd.read_csv(filename)
df.head()

Unnamed: 0,T24,T30,T48,T50,P15,P2,P21,P24,Ps30,P40,...,Wf,alt,Mach,TRA,T2,RUL,Fc,unit,hs,cycle
0,593.28656,1422.046,1797.2323,1214.0819,15.626362,11.445379,15.864327,19.897537,327.51962,332.797,...,3.811431,9889.289,0.443401,76.022545,496.67758,74,3,2,1,1
1,593.3022,1422.372,1797.9927,1214.1847,15.60764,11.424822,15.845321,19.881628,327.4833,332.75565,...,3.812693,9951.729,0.444472,76.222015,496.5389,74,3,2,1,1
2,593.14215,1421.873,1797.1808,1213.543,15.581609,11.407368,15.818893,19.848164,326.81784,332.08163,...,3.803472,10011.879,0.44583,76.19121,496.43396,74,3,2,1,1
3,592.98883,1421.4746,1796.5925,1213.012,15.554759,11.387945,15.791634,19.8148,326.22076,331.47592,...,3.795554,10073.271,0.446986,76.196846,496.30667,74,3,2,1,1
4,592.66296,1420.8185,1795.7623,1212.4274,15.512026,11.356153,15.748249,19.760134,325.3666,330.6076,...,3.784542,10136.359,0.446471,76.20448,496.02774,74,3,2,1,1


In [4]:
LABELS = ['RUL']

Operative Conditions ($w$)

DASHlink- Flight Data For Tail 687.(2012). Retrieved on 2019-01-29 from https://c3.nasa.gov/dashlink/

In [5]:
W_VAR = ['alt', 'Mach', 'TRA', 'T2']

Sensor readings ($X_s$)

In [6]:
XS_VAR = ['T24', 'T30', 'T48', 'T50', 'P15', 'P2', 'P21', 'P24', 'Ps30', 'P40', 'P50', 'Nf', 'Nc', 'Wf']


In [7]:
# dataset parameters
TRAIN_UNITS = [2, 5, 10, 16, 18, 20]
TEST_UNITS = [11, 14, 15]

DEFAULT_PARAMS = {
    # CNN model parameters
    'in_channels': 18, 
    'out_channels': 1,
    'window': 50, 
    'n_ch': 10, 
    'n_k': 10, 
    'n_hidden': 50, 
    'n_layers': 3,
    'dropout': 0.1,
    'padding': 'same',
    'use_batchnorm': True,
    # training parameters
    'batch_size': 256,  
    'base_lr': 1e-3,
    'weight_decay': 1e-5,
    'max_epochs': 50
}


DATASETS = create_datasets(df, window_size=DEFAULT_PARAMS['window'], train_units=TRAIN_UNITS, test_units=TEST_UNITS)
LOADERS = create_data_loaders(DATASETS, batch_size=DEFAULT_PARAMS['batch_size'], val_split=0.2)

train_size: 403236	validation_size: 100809	test_size: 115274


In [8]:
def run_single(seed, params=DEFAULT_PARAMS):
    seed_everything(seed)

    model = CNN(
        in_channels=params['in_channels'],
        out_channels=params['out_channels'], 
        n_ch=params['n_ch'],
        n_k=params['n_k'],
        n_hidden=params['n_hidden'],
        n_layers=params['n_layers'],
        dropout=params['dropout'],
        padding=params['padding'],
        use_batchnorm=params['use_batchnorm']
    ).to(device)  # Move model to device immediately after creation

    print(model)

    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=params['base_lr'],
        weight_decay=params['weight_decay'],
    )

    criterion = nn.MSELoss()
    trainer = Trainer(
        model,
        optimizer,
        criterion=criterion,
        n_epochs=params['max_epochs'],
        seed=seed,
        device=device,  # Pass device to trainer
        model_name=model_name
    )

    trainer.fit(LOADERS)
    df_eval, df_eval_out = trainer.eval_rul_prediction(LOADERS[1])
    df_test, df_test_out = trainer.eval_rul_prediction(LOADERS[2])
    return df_eval, df_eval_out, df_test, df_test_out, trainer.losses4aggregation

## Hyperparameter tuning


In [9]:
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
def objective(trial):
    params = DEFAULT_PARAMS.copy()
    
    # Architecture hyperparameters
    params['in_channels'] = 18  # Fixed based on input features
    params['out_channels'] = 1  # Fixed based on output
    params['window'] = 50  # Fixed based on sequence length
    
    # Tune network structure
    params['n_ch'] = trial.suggest_int('n_ch', 8, 32)  # Number of channels in conv layers
    params['n_k'] = trial.suggest_int('n_k', 5, 15)    # Kernel size
    params['n_hidden'] = trial.suggest_int('n_hidden', 32, 128)  # Hidden layer size
    params['n_layers'] = trial.suggest_int('n_layers', 2, 4)     # Number of conv layers
    
    # Tune training parameters
    params['base_lr'] = trial.suggest_float('base_lr', 1e-4, 1e-2, log=True)
    
    # Fixed parameters
    params['padding'] = 'same'
    params['max_epochs'] = 50  # Could also be tuned if needed
    
    return evaluate_model(params)

def evaluate_model(params):
    """Evaluate a model configuration"""
    n_runs = 3  # Number of runs to average over
    rmse_scores = []
    
    for run in range(n_runs):
        seed = 42 + run  # Different seed for each run
        df_eval, df_eval_out, _, _ = run_single(seed, params)
        rmse = df_eval_out['rmse'].values[0]
        rmse_scores.append(rmse)
        
        # Early stopping if performance is very poor
        if rmse > 15:  # You can adjust this threshold
            return float('inf')
    
    return np.mean(rmse_scores)

# Create and run the study
study_name = 'batchnorm_dropout_optimization'
study = optuna.create_study(
    study_name=study_name,
    direction="minimize",
    storage=f'sqlite:///{folder}/batchnorm_dropout_study.db',
    load_if_exists=True
)

# Add callback for monitoring
def print_callback(study, trial):
    if study.best_trial.number == trial.number:
        print(f"\nTrial {trial.number} finished with best value: {trial.value:.4f}")
        print("Best parameters:")
        for key, value in study.best_trial.params.items():
            print(f"    {key}: {value}")


[I 2024-11-06 16:28:38,156] Using an existing study with name 'batchnorm_dropout_optimization' instead of creating a new one.


In [11]:
# Run optimization
n_trials = 2  # Adjust based on your computational budget
study.optimize(objective, n_trials=n_trials, callbacks=[print_callback])

# Print final results
print("\nStudy statistics: ")
print(f"  Number of finished trials: {len(study.trials)}")
print(f"  Best trial:")
print(f"    Value: {study.best_trial.value:.4f}")
print("\n  Best parameters:")
for key, value in study.best_trial.params.items():
    print(f"    {key}: {value}")

# Visualize results
try:
    # Parameter importance plot
    fig = optuna.visualization.plot_param_importances(study)
    fig.show()
    
    # Optimization history
    fig = optuna.visualization.plot_optimization_history(study)
    fig.show()
    
    # Parameter relationships
    fig = optuna.visualization.plot_parallel_coordinate(study)
    fig.show()
except Exception as e:
    print(f"Visualization error: {e}")

# Save results to DataFrame
results_df = study.trials_dataframe()
results_df.to_csv(f'{folder}/batchnorm_dropout_optimization_results.csv')

CNN(
  (conv_layers): Sequential(
    (0): Conv1d(18, 26, kernel_size=(13,), stride=(1,), padding=same)
    (1): BatchNorm1d(26, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv1d(26, 26, kernel_size=(13,), stride=(1,), padding=same)
    (4): BatchNorm1d(26, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv1d(26, 26, kernel_size=(13,), stride=(1,), padding=same)
    (7): BatchNorm1d(26, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=1300, out_features=34, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (bn_fc): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=34, out_features=1, bias=True)
)
Training model for 50 epochs...
[Epoch 1] train_loss = 117.05357, eval_loss = 2458.68237, test_loss = 3820.11230 [27.7s]
[Epoch 2] tra

  model.load_state_dict(torch.load(model_path, map_location=self.device))


[Epoch 50] train_loss = 32.39606, eval_loss = 100.89209, test_loss = 125.77521 [24.8s]
Task done in 1228s
Evaluating test RUL...
Model CNN saved in models_optimized/optimized_1106162902.pt loaded to mps


100%|██████████| 394/394 [00:02<00:00, 152.85it/s]


Evaluating test RUL...
Model CNN saved in models_optimized/optimized_1106162902.pt loaded to mps


100%|██████████| 451/451 [00:02<00:00, 185.84it/s]
[W 2024-11-06 16:49:36,321] Trial 1 failed with parameters: {'n_ch': 26, 'n_k': 13, 'n_hidden': 34, 'n_layers': 3, 'base_lr': 0.0037501183745495843} because of the following error: ValueError('too many values to unpack (expected 4)').
Traceback (most recent call last):
  File "/Users/alexei.ermochkine/Desktop/ma5/ML4PM/venv4ml4pm/lib/python3.12/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/94/hphmcz155530jnw34qpssk100000gn/T/ipykernel_22957/2908481708.py", line 22, in objective
    return evaluate_model(params)
           ^^^^^^^^^^^^^^^^^^^^^^
  File "/var/folders/94/hphmcz155530jnw34qpssk100000gn/T/ipykernel_22957/2908481708.py", line 31, in evaluate_model
    df_eval, df_eval_out, _, _ = run_single(seed, params)
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: too many values to unpack (expected 4)
[W 2024-11-06 16:49:36,322] Trial

ValueError: too many values to unpack (expected 4)

In [None]:
# Load the study for resuming, comment out when reloading
# study = optuna.load_study(study_name=study_name, storage=f'sqlite:///{folder}/study.db')

### Get the best model parameter

In [None]:
# Get the est model parameter
best_trial = study.best_trial
for key, value in best_trial.params.items():
    print(f"{key}: {value:.5f}")

## TRAINING ON 5 SEEDS
## reminder: CHANGE FOLDER AND MODEL_NAME IN TRAINING.PY BEFORE RUNNING !!!!! 

In [None]:
SEED = 42
seed_everything(SEED)
device = 'mps'
N_RUNS = 5

df_list = []  
all_df_test = []
all_train_losses = []
all_eval_losses = []
all_test_losses = []

for seed in range(SEED, SEED+N_RUNS):
    print("--------------------- BEGGINING NEW SEED:", seed, "----------------")
    df_eval, df_eval_out, df_test, df_test_out, losses4aggregation = run_single(seed)
    all_train_losses.append(losses4aggregation['train'])
    all_eval_losses.append(losses4aggregation['eval'])
    all_test_losses.append(losses4aggregation['test'])
    all_df_test.append(df_test)
    df_list.append(df_test_out)  

df_all = pd.concat(df_list, ignore_index=True)  
all_train_losses = np.array(all_train_losses)
all_eval_losses = np.array(all_eval_losses)
all_test_losses = np.array(all_test_losses)

# PLOTTING RESULTS

In [None]:
plot_test_rul_predictions(all_df_test, df_all, model_name, save=True, show=True)


In [None]:

# Plot the aggregated losses
plot_aggregated_losses(all_train_losses, all_eval_losses, all_test_losses, model_name, save=True, show=True)


In [None]:
df_all.to_csv(f'test_scores/{model_name}_df_all.csv', index=False)
df_all.head()

In [None]:
results_df, best_idx = evaluate_per_unit_stored(df_all, all_df_test)