In [1]:
import os

import torch 
from torch import nn 
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split 
from torchvision import transforms, models
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from utils import PyTorchSatellitePoseEstimationDataset
from submission import SubmissionWriter

2021-09-03 09:59:39.007685: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [1]:
#FIRST TIME SETUP
#Change the path to your local SPEED data directory after download from https://kelvins.esa.int/satellite-pose-estimation-challenge/data/
DATA_PATH= "/home/salem/Documents/DLR/Challenge/speed" 

In [2]:
class SatellitePoseEstimationModel(pl.LightningModule):
    def __init__(self, submission = None) :
        super().__init__() 
        initialized_model = models.resnet18(pretrained=True)
        num_ftrs = initialized_model.fc.in_features
        initialized_model.fc = torch.nn.Linear(num_ftrs, 7)
        self.model = initialized_model
        self.submission = submission
        
    def forward(self,x):
        return self.model(x)
        
    def training_step(self,batch ,batch_idx):
        x,y = batch 
        y_hat = self.model(x)
        loss = F.mse_loss(y_hat.float(),y.float())
        self.log('step', self.trainer.current_epoch+1)
        self.log('losses', {'train': loss})
        return loss
    
    def validation_step(self, batch, batch_idx):
        x,y = batch 
        y_hat = self.model(x)
        loss = F.mse_loss(y_hat.float(),y.float())
        self.log('step', self.trainer.current_epoch+1)
        self.log('losses', {'valid': loss})
        return loss
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(),lr = 0.001)
    
    def test_step(self, batch, batch_idx):
        inputs, filenames = batch 
        outputs = self.model(inputs)
        
        q_batch = outputs[:, :4].cpu().numpy()
        r_batch = outputs[:, -3:].cpu().numpy()
    
        for filename, q, r in zip(filenames, q_batch, r_batch):
            self.submission.append_test(filename, q, r)  

In [3]:
class DataModule(pl.LightningDataModule) : 
    def __init__(self, batch_size = 32, num_workers = 8, speed_root=''):
        super().__init__()
        self.batch_size = batch_size 
        #num_workers = 4*gpu_num
        self.num_workers = num_workers 
        self.speed_root = speed_root

    def setup(self, stage = None):
        #Transforms 
        data_transforms = transforms.Compose([transforms.Resize((224, 224)),
                                              transforms.ToTensor(),
                                              transforms.Normalize([0.485, 0.456, 0.406], 
                                                                   [0.229, 0.224, 0.225])])
        full_dataset = PyTorchSatellitePoseEstimationDataset('train', self.speed_root, data_transforms)
        if stage == "fit" or stage is None:
            self.train_dataset, self.val_dataset = torch.utils.data.random_split(full_dataset, 
                                                                   [int(len(full_dataset) * .8),
                                                                    int(len(full_dataset) * .2)])
        if stage == "test" or stage is None:
            self.test_dataset = PyTorchSatellitePoseEstimationDataset('test', self.speed_root, data_transforms)
            self.real_test_dataset = PyTorchSatellitePoseEstimationDataset('realtest', self.speed_root, data_transforms)
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers) 
    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)
    def test_dataloader(self):
        return [DataLoader(self.test_dataset, batch_size = self.batch_size, num_workers = self.num_workers),
                DataLoader(self.test_dataset, batch_size = self.batch_size, num_workers = self.num_workers)]
    

In [7]:
#TODO: Submission

args = {'data-path': DATA_PATH,
        'num_workers' : 8,
        'batch_size': 32,
        'gpus':0,
        'max_epochs':2,
        'accelerator':'ddp',
        'num_nodes': 1}

trial_name = f"first_model_{args['max_epochs']}epochs"

args['logdir'] = f"./{trial_name}/logs"

os.makedirs(f"{trial_name}", exist_ok=True)

#     MySubmission = SubmissionWriter()

model = SatellitePoseEstimationModel()

dm = DataModule(batch_size = args['batch_size'], 
                num_workers = args['num_workers'], 
                speed_root = args['data-path'])

tb_logger = TensorBoardLogger(args['logdir']) #, name = trial_name)

# ------------
# training
# ------------
trainer = pl.Trainer(gpus = args['gpus'], max_epochs = args['max_epochs'], logger=tb_logger) #, plugins=DDPPlugin(find_unused_parameters=False))    
trainer.fit(model= model, datamodule = dm)
trainer.save_checkpoint(f"{trial_name}/{trial_name}.ckpt")

# try : 
#     trainer.fit(model, dm)
# except : 
#     print("ERROR : The model stoped training !")
# finally : 
#     print('Saving model...')
#     trainer.save_checkpoint(f"{trial_name}/{trial_name}.ckpt")
# #         trainer.test(model = model, datamodule = dm)
# #         print(MySubmission.test_results)
# #         MySubmission.export(out_dir="./outputs", suffix= trial_name)
# print('Done!')
# print('-'*100)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name  | Type   | Params
---------------------------------
0 | model | ResNet | 11.2 M
---------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.720    Total estimated model params size (MB)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…




In [11]:
!tensorboard --logdir ./first_model_2epochs/logs

2021-09-03 13:27:46.490844: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.4.1 at http://localhost:6007/ (Press CTRL+C to quit)
^C


In [None]:
#Loading existinf model : 

#trial_name = ''
# folder_path = f"./{trial_name}"
# model_file_path = os.path.join(model_path,"first_model_100epochs.ckpt")
# model = SatellitePoseEstimationModel()
# model = model.load_from_checkpoint(model_file_path)
