In [None]:
!git clone https://github.com/Megvii-BaseDetection/YOLOX.git

In [None]:
%cd YOLOX
!pip3 install -v -e .

In [None]:
!pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

In [None]:
%cd YOLOX

In [None]:
!pip install -q geopandas
!pip install -q rasterio
!pip install pytorch-lightning

In [None]:
import os
import torch
from torch.utils.data import DataLoader
from pytorch_lightning import LightningModule, Trainer
from yolox.models import YOLOX
from yolox.data import COCODataset, get_yolox_datadir
import torchvision.transforms as T
import yolox

In [None]:
import pytorch_lightning
pytorch_lightning.__version__

'2.1.2'

In [None]:
class YOLOXModel(LightningModule):
    def __init__(self, batch_size=1, num_workers=4):
        super().__init__()
        self.batch_size = batch_size
        self.num_workers = num_workers

        self.model = YOLOX()  # Initialize YOLOX model
    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        imgs, targets = batch
        outputs = self.model(imgs)
        loss = self.model.loss(outputs, targets)
        self.log('train_loss', loss)
        print("loss: ", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        imgs, targets = batch
        outputs = self.model(imgs)
        loss = self.model.loss(outputs, targets)
        self.log('val_loss', loss)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01)
        scheduler = {
            'scheduler': torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1),
            'interval': 'epoch',
            'frequency': 1
        }
        return [optimizer], [scheduler]

class DataModule_COCO(LightningModule):
    def __init__(self, data_dir, batch_size=1, num_workers=4):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.transform= T.Resize(512)

        self.coco_train = COCODataset(
            data_dir=os.path.join(data_dir, './'),  # Path to COCO train dataset
            json_file=os.path.join(data_dir, "coco_format_12_4/project_building_properties_right-coco-1/annotations/instances_default.json"),
            name="coco_train_dataset",
            img_size=(512,512),  # Input image size
            preproc=None  # Preprocessing function if needed
        )
        self.coco_val = COCODataset(
            data_dir=os.path.join(data_dir, './'),  # Path to COCO validation dataset
            img_size=(512,512),  # Input image size
            json_file=os.path.join(data_dir, "coco_format_12_4/project_building_properties_left-coco-1/annotations/instances_default.json"),
            name="coco_val_dataset",
            preproc=None  # Preprocessing function if needed
        )
        print(len(self.coco_train), len(self.coco_val))


    def my_collate_fn(batch):
      # `batch` is a list of samples, where each sample is a tuple (image, target)
      #print(batch)
      images = [torch.from_numpy(item[0]) for item in batch]  # Convert NumPy array to PyTorch tensor
      targets = [torch.tensor(item[1]) for item in batch]
      # Process and return the batched samples
      # Example: Returning images and targets as tensors
      return torch.stack(images), targets

    def train_dataloader(self):
        # Return the training DataLoader
        #transform = transforms.Compose([transforms.ToTensor()])
        train_dataset = self.coco_train
        train_loader = DataLoader(self.coco_train, batch_size=self.batch_size, num_workers=self.num_workers, collate_fn=self.my_collate_fn, shuffle=True)
        return train_loader

    def val_dataloader(self):
        # Return the training DataLoader
        #transform = transforms.Compose([transforms.ToTensor()])
        val_dataset = self.coco_val
        val_loader = DataLoader(self.coco_val, batch_size=self.batch_size, num_workers=self.num_workers, collate_fn=self.my_collate_fn, shuffle=False)
        return val_loader


# Set your data directory containing COCO format dataset
data_dir = 'mapillary_images'

model = YOLOXModel()
datamod = DataModule_COCO(data_dir)
trainer = Trainer(max_epochs=20) # Adjust max_epochs as needed
print("training")
trainer.fit(model, datamodule=datamod)
# Evaluation
#trainer.test(model, datamodule=datamod) #.val_loader)