# Predicting the number of rebar in an image
After training the model, we can use it to predict the number of rebar in an image.

In [None]:
import torch
import torchvision
import pandas as pd
from transformers import DetrForObjectDetection, DetrFeatureExtractor
from sklearn.metrics import mean_squared_error

In [ ]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
# Read the model from file
model = DetrForObjectDetection.from_pretrained('./model/detr_model').to(device)
model.to(device)
model.eval()

In [ ]:
# Create the Data loader to train the model
class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(self, img_folder, feature_extractor, mode='train'):
        assert mode in ['train', 'val', 'test'],  f'Unknown mode: {mode}'
        ann_file = os.path.join(img_folder, f"annotations/{mode}.json")
        super(CocoDetection, self).__init__(img_folder, ann_file)
        self.feature_extractor = feature_extractor

    def __getitem__(self, idx):
        # read in PIL image and target in COCO format
        img, target = super(CocoDetection, self).__getitem__(idx)
        
        # preprocess image and target (converting target to DETR format, resizing + normalization of both image and target)
        image_id = self.ids[idx]
        target = {'image_id': image_id, 'annotations': target}
        encoding = self.feature_extractor(images=img, annotations=target, return_tensors="pt")
        pixel_values = encoding["pixel_values"].squeeze() # remove batch dimension
        target = encoding["labels"][0] # remove batch dimension

        return pixel_values, target


In [ ]:
# Set up pre-trained model
pretrained_model = "facebook/detr-resnet-50"
img_folder = "RebarDSC/images"

feature_extractor = DetrFeatureExtractor.from_pretrained(pretrained_model)

test_dataset = CocoDetection(img_folder=f'{img_folder}', feature_extractor=feature_extractor, mode='test')

In [ ]:
def count_rebar(outputs, threshold=0.7):
  # keep only predictions with confidence >= threshold
    probas = outputs.logits.softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > threshold

  # return the count of rebar
    return len(probas[keep])

In [ ]:
# Get count from the CSV file
res = pd.read_csv("RebarDSC/images/annotations/test.csv", header=None)
res.columns = ["image_name", "bbox"]
res["image_id"] = res["image_name"].apply(lambda x: int(x.split("_")[1]))
# Get the count of rebar per image_id
res = res.groupby("image_id").count().reset_index()
res = res[["image_id", "bbox"]]
res.columns = ["image_id", "count"]
res

In [ ]:
# Get the predictions
pred_res = pd.DataFrame(columns=["image_id", "pred_count"])
for it in iter(range(len(test_dataset))):
# it = iter(range(len(test_dataset)))
    pixel_values, target = test_dataset[it]
    
    pixel_values = pixel_values.unsqueeze(0).to(device)
    # print(pixel_values.shape)
    outputs = model(pixel_values=pixel_values, pixel_mask=None)
    image_id = target['image_id'].item()
    # image = test_dataset.coco.loadImgs(image_id)[0]
    pred_count = count_rebar(outputs, threshold=0.8)
    
    pred_res.loc[len(pred_res)] = [image_id, pred_count]

In [ ]:
# processor = DetrFeatureExtractor.from_pretrained(model)
pixel_values, target = test_dataset[0]
pixel_values = pixel_values.unsqueeze(0).to(device)
outputs = model(pixel_values=pixel_values, pixel_mask=None)
# results = processor.post_process_object_detection(outputs, target_sizes=2, threshold=0.9)[0]
outputs
# probas = outputs.logits.softmax(-1)[0, :, :-1]
# probas
# pixel_values
# it

In [ ]:
# Merge the predictions with the ground truth
res = res.merge(pred_res, on="image_id")
res

In [ ]:
# Get the MSE (Mean Squared Error)
print("MSE:", mean_squared_error(res["count"], res["pred_count"]))

In [ ]:
# Compare the MSE to a naive model
avg_count = res["count"].mean()
print("MSE naive:", mean_squared_error(res["count"], [avg_count]*len(res)))