In [1]:
import sys
import os

In [2]:
## adding scripts to path
sys.path.append("../scripts")

In [141]:
import pandas as pd
from torchvision.io import decode_image
import numpy as np
import torch
from torchvision import utils
import matplotlib.pyplot as plt
## customs
import config as config
from _utils import get_labelencoder, get_transforms, get_test_transforms, get_means_and_stds
from datasets import ImageDataset
from model import GCDDDetector
from torchvision import tv_tensors
from torchvision.transforms import v2 as T

In [4]:
## paths
ANNOT_FP = "../data/Train.csv"
IMGS_PATH = "../data/images"

In [5]:
## other constants
labelencoder = get_labelencoder(ANNOT_FP, "class")
inference_transforms = get_test_transforms()
train_transforms=get_transforms()

In [6]:
##### FUNCTIONS
## function to get image and it's bounding boxes
def get_sample(df, img_id):
    image = decode_image(os.path.join(IMGS_PATH, img_id))
    img_df = df[df["Image_ID"] == img_id]
    target = {}
    target["boxes"] = torch.from_numpy(img_df[["xmin", "ymin", "xmax", "ymax"]].values.astype(np.float32))
    target["boxes"] = tv_tensors.BoundingBoxes(target["boxes"], format="XYXY", canvas_size=image.shape[-2:])
    target["labels"] = torch.tensor(labelencoder.transform(img_df["class"]), dtype=torch.int64)

    return image, target

def get_fmap_fpn_layer(fmap_dict, layer_name):
    fmap = fmap_dict[layer_name].squeeze(0)
    fmap = normalize_fmap(fmap)
    return fmap.mean(dim=0)
    
def normalize_fmap(fmap):
    fmin = fmap.min(dim=-1, keepdim=True)[0].min(dim=-2, keepdim=True)[0]
    fmax = fmap.max(dim=-1, keepdim=True)[0].max(dim=-2, keepdim=True)[0]

    fmap_norm = (fmap-fmin)/(fmax-fmin)
    return fmap_norm

def draw_bboxes(image, bbox_dict, width=4, color="blue", font_size=50):
    font = "/usr/share/fonts/truetype/liberation/LiberationMono-Bold.ttf"
    boxes = bbox_dict["boxes"]
    labels = bbox_dict["labels"]
    return utils.draw_bounding_boxes(image, boxes, [str(label.item()) for label in labels], width=width, font=font, font_size=font_size, colors=[color] * len(labels))

In [7]:
## reading csv
df = pd.read_csv(ANNOT_FP)
images = df["Image_ID"].unique()

In [8]:
image1 = images[0]
image2 = images[1]

In [9]:
sample1 = get_sample(df, image1)
sample2 = get_sample(df, image2)

In [10]:
cutmix = T.CutMix(num_classes=24)
mixup = T.MixUp(num_classes=24)
cutmix_or_mixup = T.RandomChoice([cutmix, mixup])

In [11]:
cutmix_or_mixup([sample1, sample2], labels_getter)

NameError: name 'labels_getter' is not defined

## Outlier Removal Inspect
Looking into why removing outliers makes model perform worse

In [66]:
def clean_bbox_data(df):
    width = df["xmax"] - df["xmin"]
    height = df["ymax"] - df["ymin"]
    aspect = width/height

    ## handling outliers
    min_dim = 1
    max_dim = 700
    min_ratio = 0.5
    max_ratio = 2
    df = df[((width >= min_dim) & (width <= max_dim)) &
             ((height >= min_dim) & (height <= max_dim)) &
             ((aspect >= min_ratio) & (aspect <= max_ratio))]
    
    return df

In [75]:
def get_outliers(df):
    width = df["xmax"] - df["xmin"]
    height = df["ymax"] - df["ymin"]
    aspect = width/height

    ## handling outliers
    min_dim = 1
    max_dim = 700
    min_ratio = 0.5
    max_ratio = 2
    df = df[((width < min_dim) | (width > max_dim)) |
             ((height < min_dim) | (height > max_dim)) |
             ((aspect < min_ratio) | (aspect > max_ratio))]
    
    return df 

In [76]:
df_no_dups = pd.read_csv("../data/Train_no_dup_bboxes.csv")

In [77]:
outlier_df = get_outliers(df_no_dups)

In [80]:
outlier_df["class"].value_counts()

class
Corn_Streak                  1241
Corn_Common_Rust             1205
Corn_Healthy                  926
Tomato_Healthy                593
Tomato_Late_Blight            565
Corn_Cercospora_Leaf_Spot     478
Tomato_Septoria               458
Pepper_Leaf_Curl              351
Pepper_Healthy                278
Pepper_Leaf_Mosaic            240
Tomato_Fusarium               230
Pepper_Fusarium               212
Pepper_Septoria               168
Tomato_Early_Blight           161
Tomato_Leaf_Curl               75
Pepper_Bacterial_Spot          74
Pepper_Late_Blight             73
Corn_Northern_Leaf_Blight      71
Pepper_Leaf_Blight             69
Tomato_Bacterial_Spot          34
Pepper_Cercospora              12
Tomato_Mosaic                   8
Pepper_Early_Blight             4
Name: count, dtype: int64

In [82]:
df_no_dups["class"].value_counts()

class
Corn_Cercospora_Leaf_Spot    6535
Tomato_Septoria              5311
Tomato_Late_Blight           3608
Corn_Streak                  3179
Tomato_Healthy               2872
Pepper_Leaf_Mosaic           2109
Pepper_Bacterial_Spot        1942
Tomato_Early_Blight          1896
Corn_Common_Rust             1680
Corn_Healthy                 1571
Pepper_Leaf_Curl             1527
Pepper_Septoria              1133
Tomato_Fusarium               878
Pepper_Healthy                736
Pepper_Leaf_Blight            507
Tomato_Bacterial_Spot         484
Pepper_Fusarium               471
Pepper_Cercospora             471
Tomato_Leaf_Curl              389
Pepper_Late_Blight            279
Corn_Northern_Leaf_Blight     157
Tomato_Mosaic                  84
Pepper_Early_Blight            46
Name: count, dtype: int64

In [71]:
df_out_removed = clean_bbox_data(df_no_dups)

In [72]:
df_out_removed["width"] =  df_out_removed["xmax"] - df_out_removed["xmin"]
df_out_removed["height"] = df_out_removed["ymax"] - df_out_removed["ymin"]
df_out_removed["aspect"] = (df_out_removed["width"] / df_out_removed["height"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_out_removed["width"] =  df_out_removed["xmax"] - df_out_removed["xmin"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_out_removed["height"] = df_out_removed["ymax"] - df_out_removed["ymin"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_out_removed["aspect"] = (df_out_removed["width"] / d

In [74]:
df_out_removed.shape

(30339, 10)

In [89]:
df[df["class"].isin(["Pepper_Septoria", "Tomato_Septoria"])].sort_values("xmax")

Unnamed: 0,Image_ID,confidence,class,ymin,xmin,ymax,xmax
29095,id_wfh2h9.jpg,1.0,Tomato_Septoria,681.925466,0.000000,743.783644,17.225673
31011,id_y7w9sj.jpg,1.0,Tomato_Septoria,1037.164538,0.000000,1127.652774,22.388060
15901,id_hr9j3c.jpg,1.0,Tomato_Septoria,1220.741483,7.675351,1248.582164,34.343687
21789,id_o8u5ma.jpg,1.0,Tomato_Septoria,1328.918086,0.000000,1469.567322,54.965252
21838,id_o8u5ma.jpg,1.0,Pepper_Septoria,1328.918086,0.000000,1469.567322,54.965252
...,...,...,...,...,...,...,...
7015,id_8k82kz.jpg,1.0,Tomato_Septoria,1343.713302,3892.119647,1482.267535,4032.000000
40675,id_pnuh5x.jpg,1.0,Tomato_Septoria,2793.395854,3759.895379,3024.000000,4032.000000
8542,id_a4xb7c.jpg,1.0,Tomato_Septoria,486.679734,3688.848798,976.638089,4032.000000
37859,id_g5ah21.jpg,1.0,Pepper_Septoria,1742.379259,3701.356499,1893.988790,4032.000000


In [97]:
df[df["class"].isin(["Pepper_Septoria", "Tomato_Septoria"])].groupby(["xmin", "ymin", "xmax", "ymax"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Image_ID,confidence,class
xmin,ymin,xmax,ymax,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.000000,340.124249,166.197408,618.274299,2,2,2
0.000000,443.460187,102.903981,510.462529,1,1,1
0.000000,448.659420,81.226708,569.715321,1,1,1
0.000000,655.570078,269.394816,1022.503257,2,2,2
0.000000,681.925466,17.225673,743.783644,1,1,1
...,...,...,...,...,...,...
3920.560799,2.600410,4003.403863,94.251025,1,1,1
3928.014047,192.278176,4009.681738,277.344775,1,1,1
3932.259219,315.327357,4026.925154,433.148053,1,1,1
3951.355575,2621.448258,4032.000000,2913.800717,1,1,1


In [None]:
from ultralytics import YOLO
import torchvision.transforms.v2.functional as F


model = YOLO()
model.train()
img = F.to_dtype(sample1[0], scale=True)
img = F.resize(img, 640)
model((img.unsqueeze(0)))

In [138]:
F.resize(img, 640)

tensor([[[0.4196, 0.4123, 0.4064,  ..., 0.3701, 0.3657, 0.3608],
         [0.4245, 0.4187, 0.4128,  ..., 0.3686, 0.3642, 0.3608],
         [0.4289, 0.4249, 0.4191,  ..., 0.3695, 0.3656, 0.3623],
         ...,
         [0.3255, 0.3279, 0.3318,  ..., 0.7994, 0.7917, 0.7853],
         [0.3446, 0.3471, 0.3509,  ..., 0.7849, 0.7750, 0.7676],
         [0.3569, 0.3593, 0.3623,  ..., 0.7721, 0.7603, 0.7529]],

        [[0.4863, 0.4789, 0.4730,  ..., 0.3652, 0.3578, 0.3529],
         [0.4912, 0.4854, 0.4795,  ..., 0.3637, 0.3563, 0.3529],
         [0.4971, 0.4931, 0.4877,  ..., 0.3616, 0.3548, 0.3515],
         ...,
         [0.2696, 0.2721, 0.2759,  ..., 0.7828, 0.7760, 0.7696],
         [0.2858, 0.2882, 0.2921,  ..., 0.7678, 0.7593, 0.7520],
         [0.2980, 0.3005, 0.3034,  ..., 0.7549, 0.7446, 0.7373]],

        [[0.4235, 0.4113, 0.4025,  ..., 0.2686, 0.2598, 0.2549],
         [0.4284, 0.4208, 0.4119,  ..., 0.2671, 0.2583, 0.2549],
         [0.4343, 0.4303, 0.4215,  ..., 0.2656, 0.2568, 0.

In [173]:
def merge_state_dicts(path1, path2):
    model1 = torch.load(path1, map_location="cpu")
    model2 = torch.load(path2, map_location="cpu")

    for key in model1["state_dict"]:
        model2["state_dict"][key] = (model1["state_dict"][key] + model2["state_dict"][key])/2

    return model2

In [175]:
path1 = "../checkpoints/epoch-epoch=00_map@50=map_50=0.38.ckpt"
path2 = "../checkpoints/epoch-epoch=06_map@50=map_50=0.38.ckpt"
merged = merge_state_dicts(path1, path2)
torch.save(merged, "../misc/test.ckpt")

del merged

In [182]:
from torchvision.models import MobileNet_V3_Large_Weights, mobilenet_v3_large

In [180]:
MobileNet_V3_Large_Weights.DEFAULT

MobileNet_V3_Large_Weights.IMAGENET1K_V2

In [194]:
ws = torch.load(config.BACKBONE_LOAD_PATH, map_location="cpu", weights_only=True)

In [195]:
backbone = mobilenet_v3_large()
backbone.features.load_state_dict(ws)

<All keys matched successfully>

In [197]:
ws

OrderedDict([('0.0.weight',
              tensor([[[[-3.6929e-03, -3.1163e-02,  1.0689e-01],
                        [-1.2609e-01,  2.1324e-01,  6.1509e-01],
                        [-9.4463e-02, -5.7836e-02,  2.8862e-01]],
              
                       [[ 1.2992e-01,  3.6972e-02,  1.8772e-01],
                        [-4.5728e-02,  1.3826e-01,  5.0121e-01],
                        [ 9.1598e-02,  8.6534e-02,  5.1759e-01]],
              
                       [[-1.2517e-02, -3.4389e-02, -5.8012e-02],
                        [-1.6744e-01,  8.8139e-02,  2.0874e-01],
                        [-9.0333e-02, -6.2588e-02,  9.8352e-02]]],
              
              
                      [[[-2.2733e-01,  5.1791e-02,  5.6069e-02],
                        [-3.7581e-02,  1.2240e+00, -1.0634e+00],
                        [ 4.4243e-02,  1.7041e+00, -1.7452e+00]],
              
                       [[-4.4531e-01,  2.2176e-01, -5.8311e-02],
                        [-3.4977e-02,  2.2690e+

In [201]:
from ultralytics import YOLO

m = YOLO()