In [1]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from preprocessing import MSC, MeanCenter, Autoscale, trans2absr

from SPanalysis import SpectralAnalysis
# Image display
import matplotlib.pyplot as plt
import numpy as np

# PyTorch TensorBoard support
from torch.utils.tensorboard import SummaryWriter

device = torch.device('cpu')
run_on_gpu = False
if torch.cuda.is_available():
    device = torch.device('cuda')
    print('yes')
    run_on_gpu = True

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd





In [2]:
import torch

False

In [2]:
#import data
dataset='dataset\hyperspectral\combind.xlsx'

In [3]:
analysis = SpectralAnalysis(dataset,'PLSR')
analysis.preprocess_data()

In [4]:
def rwctocat(clss):
    if clss== 'Well watered':
        return 0
    elif clss== 'Mild stress':
        return 1
    else:
        return 2

X_train, y_train_reg, y_train_class, X_test, y_test_reg, y_test_class = analysis.X_train, analysis.y_train_reg.iloc[:,4],analysis.y_train_class,analysis.X_test,analysis.y_test_reg.iloc[:,4],analysis.y_test_class

y_train_class = y_train_class.astype('category').cat.codes.to_numpy()
y_test_class =  y_test_class.astype('category').cat.codes.to_numpy()

In [5]:
y_train_class

array([0, 0, 1, 2, 1, 0, 2, 2, 0, 1, 1, 1, 0, 2, 0, 2, 1, 0, 2, 1, 2, 0,
       0, 2, 0, 0, 2, 1, 1, 2, 2, 0, 1, 1, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1,
       1, 0, 2, 0, 2, 0, 2, 1, 1, 2, 1, 2, 1, 0, 2, 0, 2, 1, 2, 1, 2, 0,
       1, 1, 1, 0, 1, 2, 2, 1, 1, 1, 2, 2, 0, 2, 2, 2, 1, 2, 1, 1, 0, 1,
       1, 2, 2, 1, 1, 0, 0, 0, 1, 0, 2, 1, 1, 1, 1, 1, 0, 1, 2, 0, 2, 1,
       2, 0, 0, 2, 0, 0, 0, 2, 1, 2, 0, 1, 2, 2, 1, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 1, 0, 1, 2, 2, 0, 2, 2, 2, 2, 0, 0, 1, 2, 1, 1, 0, 0, 0, 0,
       0, 1, 0, 2, 2, 2, 0, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0], dtype=int8)

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim

class SpectralPatchEmbedding(nn.Module):
    """
    Embeds spectral data into patches with a specified embedding dimension.
    
    Parameters:
        patch_size (int): The size of each patch.
        embedding_dim (int): The dimensionality of the embedding output.
    """
    def __init__(self, patch_size: int, embedding_dim :int)-> None:
        super().__init__()
        self.patch_size = patch_size
        self.embedding = nn.Linear(patch_size, embedding_dim)

    def forward(self, x: torch.Tensor)-> torch.Tensor:
        x = x.unfold(1, self.patch_size, self.patch_size).contiguous()
        x = x.view(x.size(0), -1, self.patch_size)
        x = self.embedding(x)
        return x

class PositionalEncoding(nn.Module):
    """
    Adds a positional encoding to embedded spectral data.
    
    Parameters:
        d_model (int): The dimensionality of the model's input.
        max_len (int, optional): The maximum length of the input sequences. Defaults to 5000.
        base (float, optional): The base of the logarithm used in calculating the div_term. Defaults to 10000.0.
    """
    def __init__(self, d_model: int, max_len: int = 5000, base: float = 10000.0) -> None:
        super(PositionalEncoding, self).__init__()
        self.max_len = max_len
        self.base = base
        
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(base) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x + self.pe[:x.size(1)]
        return x

class FeatureExtractor(nn.Module):
    '''
    FeatureExtractor: Extracts features from spectral data using a Transformer encoder.
    '''
    def __init__(self, num_features : int , patch_size : int , embedding_dim : int, num_heads : int, num_encoder_layers : int, 
                 dim_feedforward : int,activation_fn=F.gelu)-> None:
        super().__init__()
        self.cls_token = nn.Parameter(torch.randn(1, 1, embedding_dim))
        self.patch_embedding = SpectralPatchEmbedding(patch_size, embedding_dim)
        self.pos_encoder = PositionalEncoding(embedding_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=num_heads, dim_feedforward=dim_feedforward, activation=activation_fn)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)

    def forward(self, inputs : torch.Tensor) -> torch.Tensor:
        batch_size = inputs.size(0)
        #print(f'inputs= {inputs.shape}')
        cls_tokens = self.cls_token.expand(batch_size, -1, -1)
        inputs = self.patch_embedding(inputs) #[batch_size,patches,embedding]
        #print(f'after patch_embedding= {inputs.shape}')
        inputs = torch.cat((cls_tokens, inputs), dim=1) #[batch_size,patches+ cls token,embedding]
        #print(f'after class token= {inputs.shape}')
        inputs = self.pos_encoder(inputs)
        output = self.transformer_encoder(inputs) #[batch_size,patches+ cls token,embedding]
        #print(f'after transformer= {output.shape}')
        return output

class ClassificationHead(nn.Module):
    '''
    ClassificationHead: Performs classification on features extracted from spectral data.
    Incorporates AdaptiveAvgPool1d for pooling over sequence dimension.
    '''
    def __init__(self, input_features : int, hidden_features: int,embedding_dim: int, num_classes: int
                 ,activation_fn=F.gelu)-> None:
        super(ClassificationHead, self).__init__()
        
        self.fc1 = nn.Linear(input_features, hidden_features)
        self.bn1 = nn.BatchNorm1d(hidden_features)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(hidden_features, hidden_features)
        self.fc3 = nn.Linear(hidden_features, num_classes)
        # Initialize AdaptiveAvgPool1d
        # Here, output size is set to 1 to pool across all sequence elements
        self.adaptive_pool = nn.AdaptiveAvgPool1d(int(embedding_dim//2))
        self.activation=activation_fn

    def forward(self, inputs : torch.Tensor) -> torch.Tensor:
        

        #print(f'in classification {inputs.shape}')
        
        # Apply adaptive pooling to reduce seq_len to 1
        x = self.adaptive_pool(inputs) 
        #print(f'in classification after pool {x.shape}')
        # Flatten the output for the linear layer
        x = torch.flatten(x, 1)  # Now x should be [batch_size, features= embedding//2 * patches * cls token]
        # Continue through the classification head
        #print(f'in classification after flat {x.shape}')
        x = self.activation(self.bn1(self.fc1(x)))
       
        x = self.dropout(x)
        x=self.activation(self.bn1(self.fc2(x)))
        x = self.fc3(x)
        #print(f'in classification after after flat {x.shape}')
        # Apply softmax to the output layer for classification probabilities
        return F.softmax(x, dim=1) 


class RegressionHead(nn.Module):
    '''
    RegressionHead: Performs regression on features extracted from spectral data.
    '''
    def __init__(self, input_features : int, hidden_features: int,embedding_dim: int, output_features: int,
                 activation_fn=F.gelu)-> None:
        super(RegressionHead, self).__init__()
        
        self.fc1 = nn.Linear(input_features, hidden_features)
        self.bn1 = nn.BatchNorm1d(hidden_features)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(hidden_features, hidden_features)
        self.fc3 = nn.Linear(hidden_features, output_features)
        self.dropout2 = nn.Dropout(0.2)
        self.activation=activation_fn
        self.adaptive_pool = nn.AdaptiveAvgPool1d(int(embedding_dim//2))

    def forward(self, inputs : torch.Tensor) -> torch.Tensor:
        #print(f'inputs 1 shape in regression {inputs.shape}')
        x = self.adaptive_pool(inputs) # Assuming global average pooling over sequence dimension
        x = torch.flatten(x, 1)
        #print(f'x shape in regression {x.shape}')
        
        x = self.activation(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x= self.activation(self.bn1(self.fc2(x)))
        x = self.dropout2(x)
        #print(f'after regression= {x.shape}')
        return self.fc3(x)

class CombinedModel(nn.Module):
    '''
    CombinedModel: Integrates FeatureExtractor, ClassificationHead, and RegressionHead.
    Allows optional bypass of ClassificationHead based on a flag.
    '''
    def __init__(self, num_features : int, patch_size : int, embedding_dim : int,
                 num_heads : int, num_encoder_layers : int, dim_feedforward : int,
                 hidden_features_class: int, num_classes: int, hidden_features_reg: int, output_features_reg: int,
                 use_classification : bool=True,activation_fn=F.gelu)-> None:
        
        super().__init__()
        self.classification_input=((num_features//patch_size)+1)*(embedding_dim//2) #calculating classification inputs. +1 is the class token and //2 is for adaptive pooling factor
        if use_classification:
            regression_input= ((num_features//patch_size)+2)*(embedding_dim//2)
        else:
            regression_input= ((num_features//patch_size)+1)*(embedding_dim//2)
            
        self.feature_extractor = FeatureExtractor(num_features, patch_size, embedding_dim, num_heads, num_encoder_layers, dim_feedforward,activation_fn=F.gelu)
        self.use_classification = use_classification
        self.classification_head = ClassificationHead(self.classification_input, hidden_features_class,embedding_dim, num_classes,activation_fn=F.gelu)
        self.regression_head = RegressionHead(regression_input,hidden_features_reg,embedding_dim, 
                                              output_features_reg,activation_fn=F.gelu)
        self.adjust_class_score=nn.Linear(num_classes,embedding_dim)
        
    def forward(self, inputs : torch.Tensor) -> torch.Tensor:
        features = self.feature_extractor(inputs)
        
        if self.use_classification:
            class_scores = self.classification_head(features)
            #print(features.shape,class_scores.shape)
            class_scores_updated=self.adjust_class_score(class_scores)
            #print(features.shape,class_scores_updated.unsqueeze(1).shape)
            reg_input=torch.cat((features, class_scores_updated.unsqueeze(1)), dim=1) 
            #reg_input = features * class_scores_updated.unsqueeze(1)
            #print(f'regression features {reg_input.shape}')
            reg_output = self.regression_head(reg_input)
            return class_scores, reg_output
        else:
            reg_input=features
            reg_output = self.regression_head(reg_input)
            return None,reg_output


In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim

class SpectralPatchEmbedding(nn.Module):
    def __init__(self, patch_size: int, embedding_dim: int) -> None:
        super().__init__()
        self.patch_size = patch_size
        self.embedding = nn.Linear(patch_size, embedding_dim)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.unfold(1, self.patch_size, self.patch_size).contiguous()
        x = x.view(x.size(0), -1, self.patch_size)
        x = self.embedding(x)
        return x

class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, max_len: int = 5000, base: float = 10000.0) -> None:
        super(PositionalEncoding, self).__init__()
        self.max_len = max_len
        self.base = base
        
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(base) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x + self.pe[:x.size(1)]
        return x

class FeatureExtractor(nn.Module):
    def __init__(self, num_features: int, patch_size: int, embedding_dim: int, num_heads: int, num_encoder_layers: int, dim_feedforward: int, activation_fn=F.gelu) -> None:
        super().__init__()
        self.cls_token = nn.Parameter(torch.randn(1, 1, embedding_dim))
        self.patch_embedding = SpectralPatchEmbedding(patch_size, embedding_dim)
        self.pos_encoder = PositionalEncoding(embedding_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=num_heads, dim_feedforward=dim_feedforward, activation=activation_fn)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)

    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
        batch_size = inputs.size(0)
        cls_tokens = self.cls_token.expand(batch_size, -1, -1)
        inputs = self.patch_embedding(inputs)
        inputs = torch.cat((cls_tokens, inputs), dim=1)
        inputs = self.pos_encoder(inputs)
        output = self.transformer_encoder(inputs)
        return output

class ClassificationHead(nn.Module):
    def __init__(self, input_features: int, hidden_features: int, embedding_dim: int, num_classes: int, activation_fn=F.gelu) -> None:
        super(ClassificationHead, self).__init__()
        self.fc1 = nn.Linear(input_features, hidden_features)
        self.bn1 = nn.BatchNorm1d(hidden_features)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(hidden_features, hidden_features)
        self.fc3 = nn.Linear(hidden_features, num_classes)
        self.adaptive_pool = nn.AdaptiveAvgPool1d(int(embedding_dim // 2))
        self.activation = activation_fn

    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
        x = self.adaptive_pool(inputs)
        x = torch.flatten(x, 1)
        x = self.activation(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.activation(self.bn1(self.fc2(x)))
        x = self.fc3(x)
        return F.softmax(x, dim=1)

class RegressionHead(nn.Module):
    def __init__(self, input_features: int, hidden_features: int, embedding_dim: int, output_features: int, activation_fn=F.gelu) -> None:
        super(RegressionHead, self).__init__()
        self.fc1 = nn.Linear(input_features, hidden_features)
        self.bn1 = nn.BatchNorm1d(hidden_features)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(hidden_features, hidden_features)
        self.fc3 = nn.Linear(hidden_features, output_features)
        self.dropout2 = nn.Dropout(0.2)
        self.activation = activation_fn
        self.adaptive_pool = nn.AdaptiveAvgPool1d(int(embedding_dim // 2))

    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
        x = self.adaptive_pool(inputs)
        x = torch.flatten(x, 1)
        x = self.activation(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.activation(self.bn1(self.fc2(x)))
        x = self.dropout2(x)
        return self.fc3(x)

class CombinedModel(nn.Module):
    def __init__(self, num_features: int, patch_size: int, embedding_dim: int, num_heads: int, num_encoder_layers: int, dim_feedforward: int, hidden_features_class: int, num_classes: int, hidden_features_reg: int, output_features_reg: int, use_classification: bool = True, activation_fn=F.gelu) -> None:
        super().__init__()
        self.classification_input = ((num_features // patch_size) + 1) * (embedding_dim // 2)
        regression_input = ((num_features // patch_size) + (2 if use_classification else 1)) * (embedding_dim // 2)

        self.feature_extractor = FeatureExtractor(num_features, patch_size, embedding_dim, num_heads, num_encoder_layers, dim_feedforward, activation_fn=F.gelu)
        self.use_classification = use_classification
        self.classification_head = ClassificationHead(self.classification_input, hidden_features_class, embedding_dim, num_classes, activation_fn=F.gelu)
        self.regression_head = RegressionHead(regression_input, hidden_features_reg, embedding_dim, output_features_reg, activation_fn=F.gelu)
        self.adjust_class_score = nn.Linear(num_classes, embedding_dim)

    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
        features = self.feature_extractor(inputs)
        if self.use_classification:
            class_scores = self.classification_head(features)
            class_scores_updated = self.adjust_class_score(class_scores)
            reg_input = torch.cat((features, class_scores_updated.unsqueeze(1)), dim=1)
            reg_output = self.regression_head(reg_input)
            return class_scores, reg_output
        else:
            reg_output = self.regression_head(features)
            return None, reg_output


In [14]:
class MultiTaskLossWrapper(nn.Module):
    """
    MultiTaskLossWrapper calculate the weighted loss based on classification scores and regression scores. 
    Weights are learnable able parameters. 
    """
    def __init__(self, model,train_flag=True)-> None:
        super(MultiTaskLossWrapper, self).__init__()
        self.model = model
        self.classification_loss = nn.CrossEntropyLoss()
        self.regression_loss = nn.MSELoss()
        # Initialize learnable weights for each loss component
        self.task_weights = nn.Parameter(torch.ones(2, dtype=torch.float))
        self.flag=train_flag

    def forward(self, inputs, targets_classification, targets_regression):
        self.model.train(self.flag)
        class_scores, reg_output = self.model(inputs)
       
    
        loss_classification = self.classification_loss(class_scores, targets_classification)

        loss_regression = self.regression_loss(reg_output, targets_regression)
        # Combine losses with learnable weights
        #apply softmax to task_weights to ensure they are non-negative and sum to 1
#         weights = F.softmax(self.task_weights, dim=0)
        
#         combined_loss = torch.sum(weights[0] * loss_classification +
#                                   weights[1] * loss_regression)
        combined_loss=torch.sum(loss_classification + loss_regression)
#         print(f'first weight {weights[0]} \n 2nd weight {weights[1]}')
        return combined_loss, loss_classification, loss_regression
# class MultiTaskLossWrapper(nn.Module):
#     """
#     MultiTaskLossWrapper calculates the weighted loss based on classification scores and regression scores
#     using homoscedastic uncertainty as learnable parameters to weight the losses.
#     """
#     def __init__(self, model, train_flag=True):
#         super(MultiTaskLossWrapper, self).__init__()
#         self.model = model
#         self.classification_loss = nn.CrossEntropyLoss()
#         self.regression_loss = nn.MSELoss()
#         # Initialize log variance (uncertainty) parameters for each task with uniform distribution
#         #self.log_sigma1 = nn.Parameter(torch.rand(1) * (1 - 0.2) + 0.2) # For classification
#         #self.log_sigma2 = nn.Parameter(torch.rand(1) * (1 - 0.2) + 0.2) # For regression

#         self.log_sigma = nn.Parameter(torch.ones(2, dtype=torch.float))
        
#         self.train_flag = train_flag

#     def forward(self, inputs, targets_classification, targets_regression):
#         self.model.train(self.train_flag)
#         class_scores, reg_output = self.model(inputs)
        
#         # Compute classification and regression losses
#         loss = [self.classification_loss(class_scores, targets_classification),
#                 self.regression_loss(reg_output, targets_regression)]
#         loss_sum = 0
#         for i, l in enumerate(loss):
#             loss_sum += (0.5 / (self.log_sigma[i] ** 2)) * l + torch.log(1 + self.log_sigma[i] ** 2)
#         #print(f'Classification Loss Weight: {self.log_sigma[0]}, Regression Loss Weight: {self.log_sigma[1]}')
#         return loss_sum,(0.5 / (self.log_sigma** 2)),loss

In [15]:
#### 
num_features = X_train.shape[1]  # Number of features in each sample
patch_size = 8 # Arbitrary choice for the sake of example
embedding_dim = 64  # Embedding dimension after patch embedding
num_heads = 4  # Number of attention heads in Transformer encoder
num_encoder_layers = 8 # Number of layers in Transformer encoder
dim_feedforward = 128 # Dimension of feedforward network in Transformer encoder
hidden_features_class =64  # Hidden layer size for  (num_features//patch_size + cls token) * (embedding_dim got half due to adp )
num_classes = 3  # Number of classes for classification
hidden_features_reg = 64  # Hidden layer size for regression head
output_features_reg = 1  # Output size for regression (single value prediction)
use_classification = True # Flag to use classification head
batch_size=16
# Initialize the combined model
combined_model = CombinedModel(num_features, patch_size, embedding_dim, num_heads, num_encoder_layers,
                               dim_feedforward, hidden_features_class, num_classes, hidden_features_reg,
                               output_features_reg, use_classification)
loss_wrapper= MultiTaskLossWrapper(combined_model)

optimizer = optim.Adam(combined_model.parameters(), lr=0.001)  # Default weight decay for all parameters not explicitly set



# Create a TensorDataset
train_dataset = TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train_class).long(), torch.from_numpy(y_train_reg.values).float())
#train_dataset = TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train_reg.values).float())

val_dataset = TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test_class).long(), torch.from_numpy(y_test_reg.values).float())

#val_dataset = TensorDataset(torch.from_numpy(X_test).float(),  torch.from_numpy(y_test_reg.values).float())

# Create a DataLoader
training_loader = torch.utils.data.DataLoader(train_dataset,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=1)


validation_loader = torch.utils.data.DataLoader(val_dataset,
                                                batch_size=batch_size,
                                                shuffle=False,
                                                num_workers=2)
writer = SummaryWriter('runs/actual3')

In [17]:
for epoch in range(100):  # loop over the dataset multiple times
    running_loss = 0.0

    for i, data in enumerate(training_loader, 0):
        # basic training loop
        x_batch, y_class_batch, y_reg_batch = data
        
        optimizer.zero_grad()
#         combined_loss,weights,separate_loss = loss_wrapper(x_batch, y_class_batch, y_reg_batch)
    
        combined_loss,loss_class,loss_reg = loss_wrapper(x_batch, y_class_batch, y_reg_batch)
        loss_class.backward()
        loss_reg.backward()
        optimizer.step()


        running_loss += loss_reg.item()
        
        if i % 10 == 0:    # Every 10 mini-batches...
#             print('Batch {}'.format(i + 1))
            # Check against the validation set
            running_vloss = 0.0
            loss_wrapper.train_flag=False
             # Don't need to track gradents for validation
            for j, vdata in enumerate(validation_loader, 0):
                vx_batch, vy_class_batch, vy_reg_batch = vdata
                vcombined_loss,vloss_class,vloss_reg= loss_wrapper(vx_batch, vy_class_batch, vy_reg_batch)
                running_vloss += vloss_reg.item()
                
            loss_wrapper.train_flag=True
            
            avg_loss = running_loss / 1000
            avg_vloss = running_vloss / len(validation_loader)
            
            # Log the running loss averaged per batch
            writer.add_scalars('Training vs. Validation Loss',
                            { 'Training' : avg_loss, 'Validation' : avg_vloss },
                            epoch * len(training_loader) + i)

            running_loss = 0.0
#             if device.type == 'cuda':
#                 print(torch.cuda.get_device_name(0))
#                 print('Memory Usage:')
#                 print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**2,1), 'GB')
#                 print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**2,1), 'GB')
    #print(f'Epoch {epoch}----train loss {avg_loss}---val loss {avg_vloss}, \n weights_cls {weights[0]} weights_reg {weights[1]}')
    print(f'Epoch {epoch}----train loss {avg_loss}---val loss {avg_vloss}')
print('Finished Training')

writer.flush()

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [22]:
from Inception_model import Inception
from utils import init_weights,Make_one_one_plot,benchmark,train_One_epoch,evaluate

In [23]:
model=Inception()
model = model.apply(init_weights)
loss_fn = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(model.parameters(),lr=0.00316) 

scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0002633, max_lr=0.00316,step_size_up=3664, 
                                              step_size_down=None, mode='exp_range',gamma=0.99994,scale_fn=None,
                                              cycle_momentum=False)

In [24]:
training_losses=[]
training_lrs=[]
validation_losses=[]
best_valid=1000
for epoch in range(100):
    training_loss,lrs=train_One_epoch(model,training_loader,optimizer,loss_fn)  
    training_losses.append(training_loss)
    training_lrs.append(lrs)
    #Evaluate the model
    validation_loss=evaluate(model,val_loader,loss_fn)
    validation_losses.append(validation_loss)

    print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")
    
    #Check the losses and save the model
    if validation_loss < best_valid:
        best_valid = validation_loss
        print("New Best model found")
#         torch.save({
#                 'epoch': epoch,
#                 'model_state_dict': model.state_dict(),
#                 'optimizer_state_dict': optimizer.state_dict(),
#                 'train_loss':training_loss,
#                 'val_loss':validation_loss
#                 },f"../Deep learning approach/Weights/test_wts.pt")
    


[1] Training loss: 1.937	 Validation loss: 1.479
New Best model found
[2] Training loss: 1.792	 Validation loss: 1.296
New Best model found
[3] Training loss: 1.570	 Validation loss: 1.444
[4] Training loss: 1.713	 Validation loss: 1.912
[5] Training loss: 1.344	 Validation loss: 1.426
[6] Training loss: 1.385	 Validation loss: 1.311
[7] Training loss: 1.443	 Validation loss: 1.703
[8] Training loss: 1.799	 Validation loss: 1.388
[9] Training loss: 1.969	 Validation loss: 1.815
[10] Training loss: 1.651	 Validation loss: 1.739
[11] Training loss: 1.407	 Validation loss: 1.677
[12] Training loss: 1.416	 Validation loss: 1.458
[13] Training loss: 1.518	 Validation loss: 1.469
[14] Training loss: 1.468	 Validation loss: 1.222
New Best model found
[15] Training loss: 1.653	 Validation loss: 1.978
[16] Training loss: 1.367	 Validation loss: 1.436
[17] Training loss: 1.337	 Validation loss: 1.163
New Best model found
[18] Training loss: 1.302	 Validation loss: 1.312
[19] Training loss: 1.245

In [1]:
from torchviz import make_dot

ModuleNotFoundError: No module named 'torchviz'

In [2]:
!pip install torchviz

