In [24]:
import os
import pandas as pd
import numpy as np
import cv2
import torch
import torchvision
import sklearn.model_selection
from collections import Counter
import matplotlib.pyplot as plt
import albumentations as A

In [7]:
os.getcwd()

'/teamspace/studios/this_studio'

In [8]:
df = pd.read_csv('data/stairs_dataset_annotation.csv')
df['GT'].replace({'curved': 'bent', 'spiral': 'bent'}, inplace=True)

df

Unnamed: 0,filename,GT
0,stairs_001,bent
1,stairs_002,straight
2,stairs_003,bent
3,stairs_004,angular
4,stairs_005,straight
...,...,...
192,stairs_193,angular
193,stairs_194,bent
194,stairs_195,angular
195,stairs_196,straight


In [9]:
df.GT.value_counts()

GT
bent        74
straight    63
angular     60
Name: count, dtype: int64

In [10]:
IMAGE_PATH = 'data/stairs_dataset_20231124'
filepaths = sorted([os.path.join(IMAGE_PATH, fp) for fp in os.listdir(IMAGE_PATH)])
len(filepaths)

197

## Image sizes

In [11]:
# Inspect image size
sizes = [cv2.imread(filepath).shape for filepath in filepaths]
sorted_sizes = sorted(sizes, reverse=True)

In [12]:
# Larger and smaller images
sorted_sizes[0], sorted_sizes[-1]

((5312, 2988, 3), (251, 200, 3))

## ResNet instantiation

In [13]:
model = torchvision.models.resnet50(weights='IMAGENET1K_V2')
# model = torch.hub.load("pytorch/vision", "resnet50", weights="IMAGENET1K_V2")
model = model.eval()

In [14]:
# Transforms applied by the model
torchvision.models.ResNet50_Weights.DEFAULT.transforms()

ImageClassification(
    crop_size=[224]
    resize_size=[232]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

The inference transforms are available at ResNet50_Weights.IMAGENET1K_V2.transforms and perform the following preprocessing operations: Accepts PIL.Image, batched (B, C, H, W) and single (C, H, W) image torch.Tensor objects. The images are resized to resize_size=[232] using interpolation=InterpolationMode.BILINEAR, followed by a central crop of crop_size=[224]. Finally the values are first rescaled to [0.0, 1.0] and then normalized using mean=[0.485, 0.456, 0.406] and std=[0.229, 0.224, 0.225].
https://pytorch.org/vision/main/models/generated/torchvision.models.resnet50.html

In [15]:
# Random data
input = torch.randn((16, 3, 512, 512))

In [16]:
output = model(input)
output.shape

# torch.Size([16, 1000])

torch.Size([16, 1000])

## Train-Val-Test split

In [17]:
X_train_val, X_test, y_train_val, y_test = sklearn.model_selection.train_test_split(
    df['filename'].to_numpy(), 
    df['GT'].to_numpy(), 
    test_size=0.15, 
    stratify=df['GT'].to_numpy(),
    random_state=42
)

X_train_val.shape, y_train_val.shape, X_test.shape, y_test.shape

((167,), (167,), (30,), (30,))

In [18]:
X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split(
    X_train_val, 
    y_train_val, 
    test_size=0.2, 
    stratify=y_train_val,
    random_state=42
)

X_train.shape, y_train.shape, X_val.shape, y_val.shape

((133,), (133,), (34,), (34,))

In [19]:
Counter(y_val), Counter(y_test)

(Counter({'bent': 13, 'straight': 11, 'angular': 10}),
 Counter({'bent': 11, 'straight': 10, 'angular': 9}))

In [20]:
val_df = pd.DataFrame({'filename': X_val, 'GT': y_val})

In [21]:
# Probably cleaner approach
df1, df2 = sklearn.model_selection.train_test_split(df, test_size=0.2)

df1.shape, df2.shape

((157, 2), (40, 2))

In [None]:
gts = df2.GT.to_list()
gts

In [81]:
from sklearn.preprocessing import LabelEncoder

In [84]:
encoder = LabelEncoder()
encoder.fit_transform(gts)

array([2, 1, 0, 2, 2, 2, 0, 2, 0, 2, 0, 0, 1, 0, 2, 1, 0, 2, 2, 0, 1, 0,
       2, 1, 2, 1, 1, 0, 2, 0, 1, 2, 1, 0, 2, 1, 1, 0, 1, 2])

In [93]:
encoded = [0 if x=='angular' else 1 if x=='bent' else 2 if x=='straight' else ValueError for x in gts]

In [95]:
torch.nn.functional.one_hot(torch.tensor(encoded[0]))

tensor([0, 0, 1])

In [99]:
x = 'angular'

0 if x=='angular' else 1 if x=='bent' else 2 if x=='straight' else ValueError

0

In [103]:
def from_gt_to_ohe(gt):
    label = 0 if gt =='angular' else 1 if gt =='bent' else 2 if gt =='straight' else ValueError
    ohe = torch.nn.functional.one_hot(torch.tensor(label), num_classes=3)
    return ohe

In [104]:
from_gt_to_ohe('angular'), from_gt_to_ohe('bent'), from_gt_to_ohe('straight')

(tensor([1, 0, 0]), tensor([0, 1, 0]), tensor([0, 0, 1]))

## Transforms

In [25]:
transforms = A.Compose([
    A.LongestMaxSize(max_size=224, interpolation=3, p=1.0),
    A.PadIfNeeded(min_height=224, min_width=224, border_mode=0, value=0, mask_value=0, p=1.0),

])

In [26]:
image = cv2.imread(filepaths[1])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image.shape

(1080, 716, 3)

In [None]:
transformed_image = transforms(image=image)['image']
print(transformed_image.shape)
plt.imshow(transformed_image)
plt.show()

In [28]:
# train_transforms = A.Compose([
#     # Dual transforms
#     A.Resize(height=256, width=256, interpolation=3, always_apply=True),
#     A.Affine(
#         scale = (0.8, 1.2),
#         rotate = (-360, 360),
#         shear = (-20, 20),
#         p = 0.5
#     ),
#     A.HorizontalFlip(p=0.5),
#     A.VerticalFlip(p=0.5),
#     # Image only transforms
#     A.ColorJitter(
#         brightness = 0.5,
#         contrast = 0.5,
#         saturation = 0.5,
#         hue = 0,
#         p = 0.5
#     ),
#     A.CLAHE(p=0.5),
#     # A.Normalize(mean=(0.4456, 0.4436, 0.4018), std=(0.2220, 0.2154, 0.2298), p=1) # mean and std computed on this dataset.    
# ])

# val_transforms = A.Compose([
#     A.Resize(height=256, width=256, interpolation=3, always_apply=True)

In [62]:
transforms1 = A.Compose([
    A.LongestMaxSize(max_size=224, interpolation=3, p=1.0),
    A.PadIfNeeded(min_height=224, min_width=224, border_mode=0, value=0, mask_value=0, p=1.0),
    
    # Geometric
    A.Affine(
        scale = (0.8, 1.2),
        rotate = (-360, 360),
        shear = (-20, 20),
        p = 0.5
    ),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
   
    # Color
    A.ColorJitter(
        brightness = 0.7,
        contrast = 0.7,
        saturation = 0.7,
        hue = 0.7,
        p = 0.5
    ),
    A.CLAHE(p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], p=1.0) # normalization values the ResNet was trained to. torchvision.models.ResNet50_Weights.DEFAULT.transforms()
])

In [None]:
transformed_image = transforms1(image=image)['image']
print(transformed_image.shape)
plt.imshow(transformed_image)
plt.axis('off')
plt.show()

In [31]:
transformed_image.min(), transformed_image.max()

(-2.117904, -0.3403921)

In [32]:
resnet_transform_info = torchvision.models.ResNet50_Weights.DEFAULT.transforms()
mean, std = resnet_transform_info.__dict__['mean'], resnet_transform_info.__dict__['std']

mean, std

([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])