# Predicting the number of rebar in an image
After training the model, we can use it to predict the number of rebar in an image.

In [24]:
import os
import torch
import torchvision
import pytorch_lightning as pl
import pandas as pd
from transformers import DetrForObjectDetection, DetrFeatureExtractor
from sklearn.metrics import mean_squared_error

In [19]:
# Create the Data loader to train the model
class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(self, img_folder, feature_extractor, mode='train'):
        assert mode in ['train', 'val', 'test'],  f'Unknown mode: {mode}'
        ann_file = os.path.join(img_folder, f"annotations/{mode}.json")
        super(CocoDetection, self).__init__(img_folder, ann_file)
        self.feature_extractor = feature_extractor

    def __getitem__(self, idx):
        # read in PIL image and target in COCO format
        img, target = super(CocoDetection, self).__getitem__(idx)
        
        # preprocess image and target (converting target to DETR format, resizing + normalization of both image and target)
        image_id = self.ids[idx]
        target = {'image_id': image_id, 'annotations': target}
        encoding = self.feature_extractor(images=img, annotations=target, return_tensors="pt")
        pixel_values = encoding["pixel_values"].squeeze() # remove batch dimension
        target = encoding["labels"][0] # remove batch dimension

        return pixel_values, target


# We define our model based on DETR
class Detr(pl.LightningModule):

     def __init__(self, lr=1e-4, lr_backbone=1e-5, weight_decay=1e-4, num_queries=500, num_labels=2, pretrained_model="facebook/detr-resnet-50"):
         super().__init__()
         # replace COCO classification head with custom head
         self.model = DetrForObjectDetection.from_pretrained(pretrained_model, 
                                                             num_labels=num_labels,
                                                             num_queries=num_queries,
                                                             ignore_mismatched_sizes=True)
         self.lr = lr
         self.lr_backbone = lr_backbone
         self.weight_decay = weight_decay

     def forward(self, pixel_values, pixel_mask):
       outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask)

       return outputs
     
     def common_step(self, batch, batch_idx):
       pixel_values = batch["pixel_values"]
       pixel_mask = batch["pixel_mask"]
       labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]]

       outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels)

       loss = outputs.loss
       loss_dict = outputs.loss_dict

       return loss, loss_dict

     def training_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)     
        # logs metrics for each training_step,
        # and the average across the epoch
        self.log("training_loss", loss)
        for k,v in loss_dict.items():
          self.log("train_" + k, v.item())

        return loss

     def validation_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)     
        self.log("validation_loss", loss)
        for k,v in loss_dict.items():
          self.log("validation_" + k, v.item())

        return loss

     def configure_optimizers(self):
        param_dicts = [
              {"params": [p for n, p in self.named_parameters() if "backbone" not in n and p.requires_grad]},
              {
                  "params": [p for n, p in self.named_parameters() if "backbone" in n and p.requires_grad],
                  "lr": self.lr_backbone,
              },
        ]
        optimizer = torch.optim.AdamW(param_dicts, lr=self.lr,
                                  weight_decay=self.weight_decay)
        
        return optimizer
     # 
     # def train_dataloader(self):
     #    return train_dataloader
     # 
     # def val_dataloader(self):
     #    return val_dataloader

In [21]:
# Set up the device
model1 = Detr
model = Detr.load_from_checkpoint("./model/detr.ckpt")
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
# Read the model from file
model.to(device)
model.eval()

Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DetrForObjectDetection were not initialized from the model checkpoin

Detr(
  (model): DetrForObjectDetection(
    (model): DetrModel(
      (backbone): DetrConvModel(
        (conv_encoder): DetrConvEncoder(
          (model): FeatureListNet(
            (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
            (bn1): DetrFrozenBatchNorm2d()
            (act1): ReLU(inplace=True)
            (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
            (layer1): Sequential(
              (0): Bottleneck(
                (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (bn1): DetrFrozenBatchNorm2d()
                (act1): ReLU(inplace=True)
                (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
                (bn2): DetrFrozenBatchNorm2d()
                (drop_block): Identity()
                (act2): ReLU(inplace=True)
                (aa): Identity()
                (conv3): Conv2d(64, 256

In [25]:
# Set up pre-trained model
pretrained_model = "facebook/detr-resnet-50"
img_folder = "RebarDSC/images"

feature_extractor = DetrFeatureExtractor.from_pretrained(pretrained_model)

test_dataset = CocoDetection(img_folder=f'{img_folder}', feature_extractor=feature_extractor, mode='test')

loading annotations into memory...
Done (t=0.54s)
creating index...
index created!


In [26]:
def count_rebar(outputs, threshold=0.7):
  # keep only predictions with confidence >= threshold
    probas = outputs.logits.softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > threshold

  # return the count of rebar
    return len(probas[keep])

In [27]:
# Get count from the CSV file
# res = pd.read_csv("RebarDSC/images/annotations/test.csv", header=None)
# res.columns = ["image_name", "bbox"]
# res["image_id"] = res["image_name"].apply(lambda x: int(x.split("_")[1]))
# # Get the count of rebar per image_id
# res = res.groupby("image_id").count().reset_index()
# res = res[["image_id", "bbox"]]
# res.columns = ["image_id", "count"]
# res

Unnamed: 0,image_id,count
0,1,245
1,3,46
2,5,232
3,9,199
4,16,272
...,...,...
995,2117,149
996,2118,280
997,2121,174
998,2123,266


In [60]:
# Get the predictions
pred_res = pd.DataFrame(columns=["image_id", "pred_count", "act_count"])
for it in iter(range(len(test_dataset))):
    if it % 20 == 0:
        print(f'{it}/{len(test_dataset)}')
# it = iter(range(len(test_dataset)))
    pixel_values, target = test_dataset[it]
    act_count = target['class_labels'].sum().tolist()
    
    pixel_values = pixel_values.unsqueeze(0).to(device)
    # print(pixel_values.shape)
    outputs = model(pixel_values=pixel_values, pixel_mask=None)
    image_id = target['image_id'].item()
    # image = test_dataset.coco.loadImgs(image_id)[0]
    pred_count = count_rebar(outputs, threshold=0.6)
    
    pred_res.loc[len(pred_res)] = [image_id, pred_count, act_count]
    
pred_res

0/1000
20/1000
40/1000
60/1000
80/1000
100/1000
120/1000
140/1000
160/1000
180/1000
200/1000
220/1000
240/1000
260/1000
280/1000
300/1000
320/1000
340/1000
360/1000
380/1000
400/1000
420/1000
440/1000
460/1000
480/1000
500/1000
520/1000
540/1000
560/1000
580/1000
600/1000
620/1000
640/1000
660/1000
680/1000
700/1000
720/1000
740/1000
760/1000
780/1000
800/1000
820/1000
840/1000
860/1000
880/1000
900/1000
920/1000
940/1000
960/1000
980/1000


Unnamed: 0,image_id,pred_count,act_count
0,1,475,245
1,3,0,46
2,5,343,232
3,9,247,199
4,16,499,272
...,...,...,...
995,2117,0,149
996,2118,496,280
997,2121,491,174
998,2123,402,266


In [59]:
# processor = DetrFeatureExtractor.from_pretrained(model)
pixel_values, target = test_dataset[10]
pixel_values = pixel_values.unsqueeze(0).to(device)
outputs = model(pixel_values=pixel_values, pixel_mask=None)

# print(outputs.logits)
probas = outputs.logits.softmax(-1)[0, :, :-1]
probas
keep = probas.max(-1).values > 0.6

  # return the count of rebar
# len(probas[keep])
# pixel_values
# it
probas.shape

torch.Size([500, 2])

In [54]:
pred_res["act_count"].iloc[0].tolist()[0]

TypeError: 'int' object is not subscriptable

In [ ]:
# Merge the predictions with the ground truth
# res = res.merge(pred_res, on="image_id")
# res

In [61]:
# Get the MSE (Mean Squared Error)
print("MSE:", mean_squared_error(pred_res["act_count"], pred_res["pred_count"]))

MSE: 29205.638


In [63]:
# Compare the MSE to a naive model
avg_count = pred_res["act_count"].mean()
print("MSE naive:", mean_squared_error(pred_res["act_count"], [avg_count]*len(pred_res)))

MSE naive: 6179.065775
