In [16]:
import torch
from torch import nn, optim
import datetime
import os
import numpy as np
import scipy.io as sio
import torch
from torch import nn, optim
from tqdm import tqdm
from tqdm import trange
import matplotlib.pyplot as plt
import torchvision.models
import hiddenlayer as hl

In [17]:
# Check GPU Availability

haveCuda = torch.cuda.is_available()
#print(haveCuda)

In [18]:
# NN definition

class CarBadnessGuesser(nn.Module):
    def __init__(self):
        super(CarBadnessGuesser, self).__init__()

        #self.dataset = read_data()
        #self.valid_freq = 10

        self.model = nn.Sequential(
            nn.Conv3d(in_channels=1, out_channels=3, kernel_size=(10, 5, 5), stride=(10, 5, 5)),
            nn.BatchNorm3d(3),
            nn.Conv3d(in_channels=3, out_channels=2, kernel_size=5),
            nn.BatchNorm3d(2),
            nn.Conv3d(in_channels=2, out_channels=1, kernel_size=3),
            nn.BatchNorm3d(1),
            nn.AdaptiveMaxPool3d((1, 1, 10)),
        )
        
        self.linear = nn.Sequential(
            nn.Linear(in_features=10, out_features=5),
            nn.ReLU(),
            nn.Linear(in_features=5, out_features=1),
            nn.Sigmoid()
        )
        if haveCuda:
            self.linear.cuda()
            self.model.cuda()  
    
    def forward(self, x):
        conv_out = self.model(x.unsqueeze(0).unsqueeze(0))
        return self.linear(conv_out.squeeze(-2).squeeze(-2))

In [19]:
# Read data (returns datadict)

def read_data(base_path='./Data',split_size=[0.4, 0.4, 0.2]):  
    #  the split_size gives how much 
    #  of the data goes to the train/test.
    """
    Reads all of the .mat files in the given base_path, and returns a dict with the data found there.
    :param split_size:
    :param base_path: The directory that should be read in.
    :return: a dict, containing the EES and difference tensors.
    """
    i = 0
    for file in os.listdir(base_path):
        i = i + 1
    pbar = tqdm(total=i)

    data_dict = {}
    for file in os.listdir(base_path):
        num, data_type = file.split('_')
        data_type = data_type.split('.')[0]
        num = int(num)
        if "EES" in data_type:
            tensor_in = sio.loadmat(os.path.join(base_path, file))['EES_value']
            tensor_in = torch.FloatTensor(tensor_in).squeeze(0)
        else:
            tensor_in = sio.loadmat(os.path.join(base_path, file))['Kulonbseg']
            tensor_in = torch.FloatTensor(tensor_in)
        try:
            data_dict[num][data_type] = tensor_in
        except KeyError:
            data_dict[num] = {data_type: tensor_in}
        pbar.update()
    pbar.close()

    new_data = []
    for key in data_dict.keys():
        new_data.append(data_dict[key])
    if isinstance(split_size, list):
        training_samples = int(split_size[0] * len(new_data))
        valid_samples = int(split_size[1] * len(new_data))
        test_samples = int(split_size[2] * len(new_data))
        while sum([training_samples, valid_samples, test_samples]) != len(new_data):
            training_samples += 1
        new_datadict = {'train': new_data[:training_samples],
                        'validation': new_data[training_samples:training_samples + valid_samples],
                        'test': new_data[-test_samples:]}
    else:
        new_datadict = {'train': new_data,
                        'validation': new_data,
                        'test': new_data}
    
    print("Data loaded")
    return new_datadict

In [20]:
# Dataset
data = read_data()

trainSet = data['train']
testSet = data['test']
# Dataloaders are responsible for giving random (if shuffle is true) minibatches
trainLoader = torch.utils.data.DataLoader(trainSet, batch_size=5, shuffle=True)
testLoader = torch.utils.data.DataLoader(testSet, batch_size=5, shuffle=False)



100%|██████████| 100/100 [00:11<00:00,  8.92it/s]


Data loaded


In [21]:
# Create Loss

def createLoss():
    return nn.MSELoss()

In [22]:
# # We use cros entropy, since CIFAR10 is a classification set
# def createLoss():
#     return nn.CrossEntropyLoss()

In [23]:
# Create Learning Rate Scheduler

# Run for 50 epochs - 1 epoch means the networks sees every training image once
numEpoch = 50

# Cosine annealing learning rate scheduler - in 50 epochs the lr will become 0.01
def createScheduler():
    return optim.lr_scheduler.CosineAnnealingLR(optimizer,numEpoch,eta_min=1e-2)

In [24]:
 # create Optimizer
    
def createOptimizer(self):
    return optim.Adam(list(self.model.parameters()) + list(self.linear.parameters()), lr=0.0001)

In [25]:
# from torch import optim

# # Stochastic Gradient Descent (SGD) optimizer with Nesterov momentum and 0.1 learning rate
# # Weight decay is the relative weight of the L2 regularization term
# def createOptimizer():
#     return optim.SGD(net.parameters(), lr=1e-1, momentum=0.9, nesterov=True, weight_decay=1e-4)

In [26]:
# Create network

# Instantiate network and convert it to CUDA
def createNet():
    net = CarBadnessGuesser()
    if haveCuda:
        net = net.cuda()
    return net

In [27]:
from IPython.display import HTML, display

def progress(value, max=100):
    return HTML("""
        <progress
            value='{value}'
            max='{max}',
            style='width: 100%'
        >
            {value}
        </progress>
    """.format(value=value, max=max))

In [28]:
# Training

# Function for training a single epoch
def train(epoch):
    # variables for loss
    running_loss = 0.0
    correct = 0.0
    total = 0

    # set the network to train (for batchnorm and dropout)
    net.train()

    # Create progress bar
    bar = display(progress(0, len(trainLoader)), display_id=True)
    
    # data will contain one minibatch of images and correcponding labels
    # When the iteration is finished we have seen every training image once
    for i, data in enumerate(trainSet):
        #breakpoint()
        
        input_data = data['KUL'].cuda()
        prediction = net(input_data)
        loss = criterion(prediction, data['EES'].cuda())
        print('training- ', 'y^', prediction.item(), 'y', data['EES'].cuda())
        loss.backward()
        optimizer.step()
        
        
       

        # Progress bar
        bar.update(progress(i+1, len(trainLoader)))    
    
     # return loss and accuracy
    tr_loss = running_loss / i
    tr_corr = 0
    #tr_corr = correct / total * 100
    print("Train epoch %d loss: %.3f correct: %.2f" % (epoch + 1, running_loss / i, tr_corr))
    return tr_loss,tr_corr      

In [29]:
#Validation

# Function for validating a single epoch
def val(epoch):
    
    # variables for loss
    running_loss = 0.0
    correct = 0.0
    total = 0
    
    # set the network to eval  (for batchnorm and dropout)
    net.eval()
    
    # Create progress bar
    bar = display(progress(0, len(testLoader)), display_id=True)
    
    for i, data in enumerate(testSet):
        input_data = data['KUL'].cuda()
        prediction = net(input_data)
        loss = criterion(prediction, data['EES'].cuda())
        print('validation- ', 'y^', prediction.item(), 'y', data['EES'].cuda())
        loss.backward()
        optimizer.step()
        net.zero_grad()

        bar.update(progress(i+1, len(testLoader)))

    # return loss and accuracy
    val_loss = running_loss / i
    #val_corr = correct / total * 100
    val_corr = 0
    print("Test epoch %d loss: %.3f correct: %.2f" % (epoch + 1, running_loss / i, val_corr))
    return val_loss,val_corr

In [30]:
# Main

#  Containers for losses and accuracies for every epoch
train_accs = []
train_losses = []
val_accs = []
val_losses = []

# Best validation accuracy
best_acc = 0

# Set pseudo-random generator seeds to make multiple runs comparable
torch.manual_seed(1)
if haveCuda:
    torch.cuda.manual_seed(1)

# Create net, criterion, optimizer and scheduler
# This needs to be done after setting the random seed, 
# so that the random initialization would be the same
net = createNet()
criterion = createLoss()
optimizer = createOptimizer(net)
# scheduler = createScheduler()

# For numEpoch epochs
for epoch in range(numEpoch):
    
    # The with the LR scheduler
#     scheduler.step()
    
    # Train
    loss,acc = train(epoch)
    train_accs.append(acc)
    train_losses.append(loss)
    
    # Validate
    loss,acc = val(epoch)
    val_accs.append(acc)
    val_losses.append(loss)
    
    # If the current model is better, than the previous best, save it
    if acc > best_acc:
        print("Best Model, Saving")
        best_acc = acc
        torch.save(net,"./data/model.pth")
        

training-  y^ 0.33263492584228516 y tensor([11.4718], device='cuda:0')
training-  y^ 0.33516037464141846 y tensor([0.5626], device='cuda:0')
training-  y^ 0.3490506112575531 y tensor([0.0211], device='cuda:0')
training-  y^ 0.32391422986984253 y tensor([53.0057], device='cuda:0')
training-  y^ 0.3621421754360199 y tensor([16.1647], device='cuda:0')
training-  y^ 0.37643787264823914 y tensor([0.5208], device='cuda:0')
training-  y^ 0.4029316306114197 y tensor([3.6421], device='cuda:0')
training-  y^ 0.39151668548583984 y tensor([0.2980], device='cuda:0')
training-  y^ 0.4073716402053833 y tensor([40.6911], device='cuda:0')
training-  y^ 0.40479081869125366 y tensor([1.3184], device='cuda:0')
training-  y^ 0.4106837511062622 y tensor([11.8645], device='cuda:0')
training-  y^ 0.4151933491230011 y tensor([2.9321], device='cuda:0')
training-  y^ 0.35715633630752563 y tensor([54.6192], device='cuda:0')
training-  y^ 0.42532670497894287 y tensor([0.0554], device='cuda:0')
training-  y^ 0.4303

NameError: name 'tr_corr' is not defined

In [None]:
#  #Validation
    
# def validation(self):
#     """
#     Runs the validation phase of the training
#     :return: The validation loss average
#     """
#     self.eval()
#     average_loss = 0
#     step = 0
#     for step, data in enumerate(self.dataset['validation']):
#         with torch.no_grad():
#             input_data = data['KUL'].cuda()
#             prediction = self(input_data)
#             loss = self.loss_fn(prediction, data['EES'].cuda())
#             average_loss += loss.item()
#     print("Validation is complete")
#     return average_loss / (step + 1)

In [None]:
# # Training

# def train(self, epochs=100):
#     b_loss = []
#     v_loss = []
#     for epoch in trange(epochs):
#         for step, data in enumerate(self.dataset["train"]):
#             input_data = data['KUL'].cuda()
#             prediction = self(input_data)
#             loss = self.loss_fn(prediction, data['EES'].cuda())
#             loss.backward()
#             self.optimizer.step()
#             self.zero_grad()
#             b_loss.append(loss.item())

#         if epoch % self.valid_freq and epoch != 0:
#             print(f'Validation loss: {self.validation()}', flush=True)
#             v_loss.append(self.validation())

#     print("Train is complete")

#     #self.test()
#     #self.save_model()
#     plt.plot(v_loss)
#     plt.ylabel('Validation loss')
#     plt.show()
#     plt.plot(b_loss)
#     plt.ylabel('Batch loss')
#     plt.show()

   

In [None]:
def test(self):
        """
        Runs the evaluation of the network.
        :return: average loss for the test
        """
        average_loss = 0
        step = 0
        for step, data in enumerate(self.dataset['test']):
            with torch.no_grad():
                input_data = data['KUL'].cuda()
                prediction = self(input_data)
                loss = self.loss_fn(prediction, data['EES'].cuda())
                average_loss += loss.item()
        print("the test is complete")
        return average_loss / step

In [None]:
def save_checkpoint(self, save_dir="./training"):
        timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S.%d")
        save_path = os.path.join(save_dir, timestamp)
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, os.path.join(save_path, 'model.pth'))

In [None]:
def save_model(model):
    """
    Saves weights to the given directory plus the timestamp
    :return: none
    """
    save_dir="./training"
    timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S.%d")
    save_path = os.path.join(save_dir, timestamp)
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    torch.save(model.state_dict(), os.path.join(save_path, 'model.pth'))
    print("saving weights is complete")

In [None]:
# Main

# Containers for losses and accuracies for every epoch
train_accs = []
train_losses = []
val_accs = []
val_losses = []

# Best validation accuracy
best_acc = 0

# Set pseudo-random generator seeds to make multiple runs comparable
torch.manual_seed(1)
if haveCuda:
    torch.cuda.manual_seed(1)

# Create net, criterion, optimizer and scheduler
# This needs to be done after setting the random seed, 
# so that the random initialization would be the same
net = createNet()
criterion = createLoss()
# optimizer = createOptimizer()
# scheduler = createScheduler()

# # For numEpoch epochs
# for epoch in range(numEpoch):
    
#     # The with the LR scheduler
#     scheduler.step()
    
#     # Train
#     loss,acc = train(epoch)
#     train_accs.append(acc)
#     train_losses.append(loss)
    
#     # Validate
#     loss,acc = val(epoch)
#     val_accs.append(acc)
#     val_losses.append(loss)
    
#     # If the current model is better, than the previous best, save it
#     if acc > best_acc:
#         print("Best Model, Saving")
#         best_acc = acc
#         torch.save(net,"./data/model.pth")
        


    

In [None]:
if __name__ == "__main__":
    learner = CarBadnessGuesser()
    torch.backends.cudnn.enabled = False
    learner.train()
    save_model(learner)

In [None]:
learner.eval()

In [None]:
def predict_image(image, model):
    image_tensor = test_transforms(image).float()
    image_tensor = image_tensor.unsqueeze_(0)
    input = Variable(image_tensor)
    input = input.to(device)
    output = model(input)
    index = output.data.cpu()
    return index

In [None]:
base_path='./Data'
for file in os.listdir(base_path):
        data_dict = {}
        num, data_type = file.split('_')
        data_type = data_type.split('.')[0]
        num = int(num)
        if "EES" in data_type:
            tensor_in = sio.loadmat(os.path.join(base_path, file))['EES_value']
            tensor_in = torch.FloatTensor(tensor_in).squeeze(0)
        else:
            tensor_in = sio.loadmat(os.path.join(base_path, file))['Kulonbseg']
            tensor_in = torch.FloatTensor(tensor_in)
        try:
            data_dict[num][data_type] = tensor_in
        except KeyError:
            data_dict[num] = {data_type: tensor_in}


In [None]:
    new_data = []
    for key in data_dict.keys():
        new_data.append(data_dict[key])

In [None]:
input_data = new_data['KUL']
prediction = self(input_data)