### Optuna Tutorial 1 : 
https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/001_first.html#sphx-glr-tutorial-10-key-features-001-first-py


In [54]:
import optuna
def objective(trial):
    x = trial.suggest_float('x', -3,3) #add parameter to optimize 
    return (x-2)**2

In [55]:
##creating the study and actually running it 
"""
ftrial : single execution of the objective function

"""
study = optuna.create_study()
study.optimize(objective, n_trials=20)


[32m[I 2022-12-15 07:00:05,858][0m A new study created in memory with name: no-name-f73a5d9c-0dbe-4a65-9ea4-a7d24e1e3f36[0m
[32m[I 2022-12-15 07:00:05,864][0m Trial 0 finished with value: 5.5384787646308 and parameters: {'x': -0.35339728151257965}. Best is trial 0 with value: 5.5384787646308.[0m
[32m[I 2022-12-15 07:00:05,866][0m Trial 1 finished with value: 4.444712417093395 and parameters: {'x': -0.1082486611150486}. Best is trial 1 with value: 4.444712417093395.[0m
[32m[I 2022-12-15 07:00:05,869][0m Trial 2 finished with value: 9.677057981205461 and parameters: {'x': -1.1107970009638142}. Best is trial 1 with value: 4.444712417093395.[0m
[32m[I 2022-12-15 07:00:05,870][0m Trial 3 finished with value: 1.9823178178378598 and parameters: {'x': 0.5920519122361578}. Best is trial 3 with value: 1.9823178178378598.[0m
[32m[I 2022-12-15 07:00:05,872][0m Trial 4 finished with value: 0.01086616640462463 and parameters: {'x': 2.1042409056207045}. Best is trial 4 with value: 0.

In [56]:
##looking at results 

print(study.best_params)

print(study.best_value)

print(study.best_trial)

print(study.trials_dataframe()) #get all trials as dataframe


{'x': 1.9385552073452568}
0.0037754625443843877
FrozenTrial(number=17, values=[0.0037754625443843877], datetime_start=datetime.datetime(2022, 12, 15, 7, 0, 5, 906325), datetime_complete=datetime.datetime(2022, 12, 15, 7, 0, 5, 907975), params={'x': 1.9385552073452568}, distributions={'x': UniformDistribution(high=3.0, low=-3.0)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=17, state=TrialState.COMPLETE, value=None)
    number      value             datetime_start          datetime_complete  \
0        0   5.538479 2022-12-15 07:00:05.862921 2022-12-15 07:00:05.864158   
1        1   4.444712 2022-12-15 07:00:05.866012 2022-12-15 07:00:05.866575   
2        2   9.677058 2022-12-15 07:00:05.868254 2022-12-15 07:00:05.868696   
3        3   1.982318 2022-12-15 07:00:05.870024 2022-12-15 07:00:05.870450   
4        4   0.010866 2022-12-15 07:00:05.871569 2022-12-15 07:00:05.872018   
5        5  15.519945 2022-12-15 07:00:05.873141 2022-12-15 07:00:05.873544   
6      

## Optuna Tutorial 2 : 
https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/002_configurations.html
https://github.com/optuna/optuna-examples/blob/main/pytorch/pytorch_simple.py
https://www.youtube.com/watch?v=P6NwZVl8ttc



### 2-0. 공통


In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms

In [2]:
#getting the dataloader dataset 
def get_mnist(batch_size):
    # Load FashionMNIST dataset.
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()),
        batch_size=batch_size,
        shuffle=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(os.getcwd(), train=False, transform=transforms.ToTensor()),
        batch_size=batch_size,
        shuffle=True,
    )

    return train_loader, valid_loader

### 2-1. Without Optuna

In [8]:
#hardcoding my hyperparameters in 
DEVICE = torch.device(0)
class my_model(nn.Module):
    def __init__(self):
        super(my_model, self).__init__()
        #parameters to be optimized later
        self.n_layers = 3 #will be optimized later
        
        layers = []
        in_features = 28*28  #input featues, for first layer is the img size flattened
        #use for loop to create model 
        for i in range(self.n_layers):
            #parameters to be optimized later
            out_features = 64 
            dropout_rate = 0.5
            
            layers.append(nn.Linear(in_features, out_features))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate)) 
            
            in_features = out_features #so that it it is used proerly in next iteration
        
        layers.append(nn.Linear(in_features, 10)) #classificaiton layer
        layers.append(nn.LogSoftmax(dim = 1 )) #classification, normalize using softmax
        self.total = nn.Sequential(*layers) #create model 
            
    def forward(self, x):
        return self.total(x) 
    
    """
    parameters to be optimized if using optuna
    * self.n_layers
    * out_features (for each layer)
    * dropout_rate 
    """

    
###runing the actual modde
model = my_model().to(DEVICE)


##parameters to be optimized
lr = 1e-2
BATCHSIZE = 64
optimizer = optim.Adam(model.parameters(), lr = lr)


train_loader, valid_loader = get_mnist(batch_size = BATCHSIZE )

for epoch in range(10):
    model.train()
    
    #get training loss 
    for batch_idx, (data, target) in enumerate(train_loader):
        #limiting training dataset for faster epochs (일단은 무시)
        if batch_idx*BATCHSIZE > BATCHSIZE*30 :  #i.e. don't do many iterations (only a few per epoch)
            break
        
        #doing some stuff to data
        data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE) 
        
        #actual training
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()    
    print(f"EPOCH : {epoch} training loss, : {loss.item()}")
    #get valdiation loss
    model.eval()
    correct = 0 #count number of correct samples, to get accruacy
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(valid_loader):
            #limiting validation size 
            if batch_idx*BATCHSIZE > BATCHSIZE*10 : 
                break
            
            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE) 
            
            #actual validaiton 
            output = model(data)
            # Get the index of the max log-probability.
            pred = output.argmax(dim=1, keepdim=True)
            loss = F.nll_loss(output, target)
            
            
            correct += pred.eq(target.view_as(pred)).sum().item()
        
    accuracy = correct / min(len(valid_loader.dataset), BATCHSIZE*10) 
        
    print(f"EPOCH : {epoch} validation loss, accuracy : {loss} / {accuracy}") 

EPOCH : 0 training loss, : 1.44558846950531
EPOCH : 0 validation loss, accuracy : 1.2670201063156128 / 0.5609375
EPOCH : 1 training loss, : 1.2591526508331299
EPOCH : 1 validation loss, accuracy : 1.0117079019546509 / 0.6578125
EPOCH : 2 training loss, : 1.1057322025299072
EPOCH : 2 validation loss, accuracy : 0.986359715461731 / 0.6828125
EPOCH : 3 training loss, : 1.3560160398483276
EPOCH : 3 validation loss, accuracy : 0.9358760118484497 / 0.703125
EPOCH : 4 training loss, : 1.24252450466156
EPOCH : 4 validation loss, accuracy : 0.7710863947868347 / 0.7625
EPOCH : 5 training loss, : 1.2464555501937866
EPOCH : 5 validation loss, accuracy : 0.7817651033401489 / 0.8078125
EPOCH : 6 training loss, : 1.2980538606643677
EPOCH : 6 validation loss, accuracy : 0.8685086965560913 / 0.7015625
EPOCH : 7 training loss, : 1.2039804458618164
EPOCH : 7 validation loss, accuracy : 1.006385087966919 / 0.715625
EPOCH : 8 training loss, : 1.152542233467102
EPOCH : 8 validation loss, accuracy : 0.810612

### 2-2. With Optuna

In [9]:
import optuna
#hardcoding my hyperparameters in 
DEVICE = torch.device(0)

##model takes in trial as input 
class my_model(nn.Module):
    def __init__(self, trial):
        super(my_model, self).__init__()
        #parameters to be optimized later
        self.n_layers = trial.suggest_int(name = "n_layers", low =1, high = 3) 
        
        layers = []
        in_features = 28*28  #input featues, for first layer is the img size flattened
        #use for loop to create model 
        for i in range(self.n_layers):
            #parameters to be optimized later
            #out_features = 64 
            out_features = trial.suggest_int(f"n_units_l{i}", 4, 64)
            #dropout_rate = 0.5
            dropout_rate = trial.suggest_float(f"dropout_l{i}", 0.00001, 0.5)
            
            layers.append(nn.Linear(in_features, out_features))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate)) 
            
            in_features = out_features #so that it it is used proerly in next iteration
        
        layers.append(nn.Linear(in_features, 10)) #classificaiton layer
        layers.append(nn.LogSoftmax(dim = 1 )) #classification, normalize using softmax
        self.total = nn.Sequential(*layers) #create model 
            
    def forward(self, x):
        return self.total(x) 
    
    """
    parameters to be optimized if using optuna
    * self.n_layers
    * out_features (for each layer)
    * dropout_rate 
    """                     
    

def objective(trial):
    ###runing the actual modde
    #model = my_model().to(DEVICE)
    model = my_model(trial).to(DEVICE) #must put trial in 
    
    
    ##parameters to be optimized
    lr = trial.suggest_float("learning_rate", 1e-3, 1e-1, log = True)
    BATCHSIZE = trial.suggest_int("batch_size", 16, 128, log = True)
    optimizer_choice = trial.suggest_categorical('optimizer', ["Adam", "RMSprop", "SGD"])

    
    #using the trial suggest things and use them to define our wanted things
    optimizer = getattr(optim, optimizer_choice)(model.parameters(), lr = lr) 
    train_loader, valid_loader = get_mnist(batch_size = BATCHSIZE )
    
    for epoch in range(10):
        model.train()
        
        #get training loss 
        for batch_idx, (data, target) in enumerate(train_loader):
            #limiting training dataset for faster epochs (일단은 무시)
            if batch_idx*BATCHSIZE > BATCHSIZE*30 :  #i.e. don't do many iterations (only a few per epoch)
                break
            
            #doing some stuff to data
            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE) 
            
            #actual training
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()    
        print(f"EPOCH : {epoch} training loss, : {loss.item()}")
        #get valdiation loss
        model.eval()
        correct = 0 #count number of correct samples, to get accruacy
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                #limiting validation size 
                if batch_idx*BATCHSIZE > BATCHSIZE*10 : 
                    break
                
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE) 
                
                #actual validaiton 
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                loss = F.nll_loss(output, target)
                
                
                correct += pred.eq(target.view_as(pred)).sum().item()
            
        accuracy = correct / min(len(valid_loader.dataset), BATCHSIZE*10) 
        
        trial.report(accuracy, epoch)
            
        print(f"EPOCH : {epoch} validation loss, accuracy : {loss} / {accuracy}") 
        
    return accuracy #the final thing we want to maximize 
        

In [3]:
##actual running  
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials= 10, timeout= 600)


print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))


print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items(): #get dict values 
    print("    {}: {}".format(key, value))



    



NameError: name 'optuna' is not defined

# TOD
1. 준범쌤 주신 것으로 pruning 을 parallel 하게 돌리는 법을 보기 (근데 굳이 parallel하게 돌려야할지 싶다)
    * 아그리고 don't use DDP with optuna (don't mix well)
2. study the pruner and implement it as an example (2-2)
3. study and ipmlement CV version 
4. should BN statistic freeze during finetuning ?
    * optuna로 하기!



![image.png](attachment:8f0a2057-d987-442c-849c-85960553995c.png)


![image.png](attachment:0545eee0-3986-41b8-b227-55658d389c76.png)


In [2]:
optuna.trial.Trial.suggest_float()
optuna.trial.Trial.suggest_int()

NameError: name 'optuna' is not defined

### Optuna CV
https://stackoverflow.com/questions/63224426/how-can-i-cross-validate-by-pytorch-and-optuna
