In [1]:
from utils import *
import numpy as np
import torch.optim as optim
import ray
from ray import tune

In [2]:
X_train,X_val, X_test, y_train, y_val, y_test = load_data('FEV1')
# print(X.shape, Y.shape)
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

torch.Size([38, 166]) torch.Size([38, 1])
torch.Size([5, 166]) torch.Size([5, 1])
torch.Size([5, 166]) torch.Size([5, 1])


In [36]:
torch.manual_seed(42)
# Define model, loss function, and optimizer
model = MLP(input_size=X_train.shape[1], hidden_sizes=[75,35,15,5], output_size=1)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)

# Train and validate the model
num_epochs = 500
# train_losses = []
# val_losses = []
best_val_loss = float('inf')
ep =0
for epoch in range(num_epochs):
    
    # Training
    model.train()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    # train_losses.append(loss.item())
    
    # Validation
    model.eval()
    with torch.no_grad():
        outputs = model(X_val)
        val_loss = criterion(outputs, y_val)
        # val_losses.append(loss.item())

        if val_loss.item() < best_val_loss:
            best_val_loss = val_loss.item()
            ep = epoch
            torch.save(model.state_dict(), 'best_model.pt') 

    if (epoch+1 ==1) or (epoch+1) % 100 == 0:
        print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item(), val_loss.item()))

# Evaluate 
model.load_state_dict(torch.load('best_model.pt'))
model.eval()
with torch.no_grad():
    print('val_loss', criterion(model(X_val), y_val).item())
    y_pred = model(X_test)
    test_loss = criterion(y_pred, y_test)
    print('MSE Test Loss: {:.4f}'.format(test_loss.item()))
    print('MAPE Test Loss: {:.4f}'.format(mape_loss(y_pred, y_test).item())) 

Epoch [1/500], Train Loss: 6.7897, Val Loss: 7.0684
Epoch [100/500], Train Loss: 0.0878, Val Loss: 0.0684
Epoch [200/500], Train Loss: 0.0588, Val Loss: 0.0704
Epoch [300/500], Train Loss: 0.0350, Val Loss: 0.1180
Epoch [400/500], Train Loss: 0.0141, Val Loss: 0.2402
Epoch [500/500], Train Loss: 0.0052, Val Loss: 0.3306
val_loss 0.06285937875509262
MSE Test Loss: 0.1120
MAPE Test Loss: 9.1002


In [19]:
ep

27

In [6]:
best_val_loss

0.01642843708395958

In [42]:
import ray
from ray import tune

def train_mlp(config, checkpoint_dir=None):
    X_train,X_val, X_test, y_train, y_val, y_test = load_data('FVC')
    best_val_loss = float('inf')

    model = MLP(input_size=X_train.shape[1], hidden_sizes=config["hidden_size"], output_size=1)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=config["lr"])
    num_epochs = 50
    for epoch in range(num_epochs):
      # Training
      model.train()
      outputs = model(X_train)
      loss = criterion(outputs, y_train)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      tune.report(loss=loss.item())
      # Validation
      model.eval()
      with torch.no_grad():
            outputs = model(X_val)
            val_loss = criterion(outputs, y_val)
            if val_loss.item() < best_val_loss:
                best_val_loss = val_loss

      # tune.report(val_loss = best_val_loss)
      # tune.report(val_loss = val_loss.item())
      tune.report(best_val_loss = best_val_loss.item())


In [43]:
config = {
    "hidden_size": tune.choice([[50],[50,10], [75,35,15,5],[60,30,5] ]),
    # "lr": tune.loguniform(1e-5, 1e-1),
    'lr':tune.choice([0.01, 0.005, 0.001]),
    # "num_epochs": tune.choice([2000])
}


ray.shutdown()
# Initialize Ray
ray.init()


analysis = tune.run(
    train_mlp,
    config=config,
    num_samples=10,
    progress_reporter=tune.CLIReporter()
)

2023-04-26 15:09:45,120	INFO worker.py:1625 -- Started a local Ray instance.


== Status ==
Current time: 2023-04-26 15:09:48 (running for 00:00:02.97)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/16 CPUs, 0/2 GPUs
Result logdir: /home/akbar/ray_results/train_mlp_2023-04-26_15-09-46
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+-----------------------+----------+--------------------+-----------------+-------+
| Trial name            | status   | loc                | hidden_size     |    lr |
|-----------------------+----------+--------------------+-----------------+-------|
| train_mlp_479bb_00000 | RUNNING  | 10.7.44.21:1041780 | [75, 35, 15, 5] | 0.01  |
| train_mlp_479bb_00001 | PENDING  |                    | [50]            | 0.001 |
| train_mlp_479bb_00002 | PENDING  |                    | [60, 30, 5]     | 0.01  |
| train_mlp_479bb_00003 | PENDING  |                    | [50, 10]        | 0.01  |
| train_mlp_479bb_00004 | PENDING  |                    | [75, 35, 15, 5] | 0.001 |
| train_mlp_479bb_00005 | PENDING  |                    | [7

Trial name,date,done,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
train_mlp_479bb_00000,2023-04-26_15-09-49,True,cvig,100,,10.7.44.21,1041780,0.412217,0.00250745,0.412217,1682501989,100,479bb_00000
train_mlp_479bb_00008,2023-04-26_15-09-52,False,cvig,1,9.7188138961792,10.7.44.21,1041853,0.0530739,0.0530739,0.0530739,1682501992,1,479bb_00008


2023-04-26 15:09:53,606	INFO tune.py:945 -- Total run time: 7.62 seconds (7.56 seconds for the tuning loop).


== Status ==
Current time: 2023-04-26 15:09:53 (running for 00:00:07.58)
Using FIFO scheduling algorithm.
Logical resource usage: 0/16 CPUs, 0/2 GPUs
Result logdir: /home/akbar/ray_results/train_mlp_2023-04-26_15-09-46
Number of trials: 10/10 (10 TERMINATED)
+-----------------------+------------+--------------------+-----------------+-------+--------+------------------+-----------------+
| Trial name            | status     | loc                | hidden_size     |    lr |   iter |   total time (s) |   best_val_loss |
|-----------------------+------------+--------------------+-----------------+-------+--------+------------------+-----------------|
| train_mlp_479bb_00000 | TERMINATED | 10.7.44.21:1041780 | [75, 35, 15, 5] | 0.01  |    100 |         0.412217 |       0.0263909 |
| train_mlp_479bb_00001 | TERMINATED | 10.7.44.21:1041846 | [50]            | 0.001 |    100 |         0.787838 |       8.40317   |
| train_mlp_479bb_00002 | TERMINATED | 10.7.44.21:1041847 | [60, 30, 5]     | 0.0

In [44]:
# Print the best hyperparameters found by Ray Tune
best_config = analysis.get_best_config(metric="best_val_loss", mode="min")
print("Best config:", best_config)


Best config: {'hidden_size': [75, 35, 15, 5], 'lr': 0.01}


In [3]:
best_config = {'hidden_size': [75, 35, 15, 5], 'lr': 0.01}

In [4]:
import tempfile
import torch.utils.tensorboard as tb

In [5]:
log_dir  = 'D:\iitgn\Thesis\Spiro_Mask2'
LOGGER = tb.SummaryWriter(log_dir + '/train_fev1', flush_secs = 1)

In [6]:
from torch.utils.tensorboard import SummaryWriter
torch.manual_seed(42)
# Define the model
model = MLP(input_size=X_train.shape[1], hidden_sizes=best_config["hidden_size"], output_size=1)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=best_config["lr"])

# Train and validate the model
# num_epochs = 5000
# train_losses = []
# val_losses = []
best_val_loss = float('inf')

num_epochs = 200
for epoch in range(num_epochs):
    
    # Training
    model.train()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    LOGGER.add_scalar('Train Loss', loss, epoch)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    # train_losses.append(loss.item())
    
    # Validation
    model.eval()
    with torch.no_grad():
        outputs = model(X_val)
        val_loss = criterion(outputs, y_val)
        # val_losses.append(loss.item())

        if val_loss.item() < best_val_loss:
            best_val_loss = val_loss.item()
            torch.save(model.state_dict(), 'best_model_hyper.pt') 
    
    # Print progress
    if (epoch+1 ==1) or (epoch+1) % 10 == 0:
        print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item(), val_loss.item()))

# Evaluate the best model on the test set
model.load_state_dict(torch.load('best_model_hyper.pt'))
model.eval()
with torch.no_grad():
    print('val_loss', criterion(model(X_val), y_val).item())
    y_pred = model(X_test)
    test_loss = criterion(y_pred, y_test)
    print('MSE Test Loss: {:.4f}'.format(test_loss.item()))
    print('MAPE Test Loss: {:.4f}'.format(mape_loss(y_pred, y_test).item())) 

# Close the SummaryWriter instance
LOGGER.close() 

Epoch [1/200], Train Loss: 6.7897, Val Loss: 6.9958
Epoch [10/200], Train Loss: 4.1150, Val Loss: 4.1562
Epoch [20/200], Train Loss: 1.2169, Val Loss: 1.3173
Epoch [30/200], Train Loss: 0.3808, Val Loss: 0.2103
Epoch [40/200], Train Loss: 0.2164, Val Loss: 0.3280
Epoch [50/200], Train Loss: 0.1328, Val Loss: 0.0829
Epoch [60/200], Train Loss: 0.1044, Val Loss: 0.1358
Epoch [70/200], Train Loss: 0.0932, Val Loss: 0.0902
Epoch [80/200], Train Loss: 0.0843, Val Loss: 0.0728
Epoch [90/200], Train Loss: 0.0782, Val Loss: 0.0678
Epoch [100/200], Train Loss: 0.0722, Val Loss: 0.0684
Epoch [110/200], Train Loss: 0.0676, Val Loss: 0.0666
Epoch [120/200], Train Loss: 0.0632, Val Loss: 0.0673
Epoch [130/200], Train Loss: 0.0591, Val Loss: 0.0683
Epoch [140/200], Train Loss: 0.0551, Val Loss: 0.0690
Epoch [150/200], Train Loss: 0.0514, Val Loss: 0.0677
Epoch [160/200], Train Loss: 0.0478, Val Loss: 0.0667
Epoch [170/200], Train Loss: 0.0442, Val Loss: 0.0654
Epoch [180/200], Train Loss: 0.0408, Va

In [8]:
%load_ext tensorboard
%tensorboard --logdir=D:\iitgn\Thesis\Spiro_Mask2\train

Reusing TensorBoard on port 6006 (pid 2132), started 1:00:16 ago. (Use '!kill 2132' to kill it.)