In [36]:
import os
import pandas as pd
import numpy as np
import cv2
import torch
import torchvision
import sklearn.model_selection

In [40]:
df = pd.read_csv('../data/stairs_dataset_annotation.csv')
df['GT'].replace({'curved': 'bent', 'spiral': 'bent'}, inplace=True)

df

Unnamed: 0,filename,GT
0,stairs_001,bent
1,stairs_002,straight
2,stairs_003,bent
3,stairs_004,angular
4,stairs_005,straight
...,...,...
192,stairs_193,angular
193,stairs_194,bent
194,stairs_195,angular
195,stairs_196,straight


In [11]:
df.GT.value_counts()

GT
bent        74
straight    63
angular     60
Name: count, dtype: int64

In [15]:
IMAGE_PATH = '../data/stairs_dataset_20231124'
filepaths = [os.path.join(IMAGE_PATH, fp) for fp in os.listdir(IMAGE_PATH)]
len(filepaths)

197

## Image sizes

In [18]:
# Inspect image size
sizes = [cv2.imread(filepath).shape for filepath in filepaths]
sorted_sizes = sorted(sizes, reverse=True)

In [20]:
# Larger and smaller images
sorted_sizes[0], sorted_sizes[-1]

((5312, 2988, 3), (251, 200, 3))

## ResNet instantiation

In [26]:
model = torchvision.models.resnet50(weights='IMAGENET1K_V2')
# model = torch.hub.load("pytorch/vision", "resnet50", weights="IMAGENET1K_V2")
model = model.eval()

In [27]:
# Transforms applied by the model
torchvision.models.ResNet50_Weights.DEFAULT.transforms()

ImageClassification(
    crop_size=[224]
    resize_size=[232]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

The inference transforms are available at ResNet50_Weights.IMAGENET1K_V2.transforms and perform the following preprocessing operations: Accepts PIL.Image, batched (B, C, H, W) and single (C, H, W) image torch.Tensor objects. The images are resized to resize_size=[232] using interpolation=InterpolationMode.BILINEAR, followed by a central crop of crop_size=[224]. Finally the values are first rescaled to [0.0, 1.0] and then normalized using mean=[0.485, 0.456, 0.406] and std=[0.229, 0.224, 0.225].
https://pytorch.org/vision/main/models/generated/torchvision.models.resnet50.html

In [23]:
# Random data
input = torch.randn((16, 3, 512, 512))

In [30]:
output = model(input)
output.shape

# torch.Size([16, 1000])

torch.Size([16, 1000])

## Train-Val-Test split

In [53]:
X_train_val, y_train_val, X_test, y_test = sklearn.model_selection.train_test_split(df['filename'].to_numpy(), df['GT'].to_numpy(), test_size=0.2, stratify=df['GT'].to_numpy())

In [55]:
np.unique(y_test, return_counts=True)

(array(['angular', 'bent', 'straight'], dtype=object), array([12, 15, 13]))