#### This notebook is based on https://www.kaggle.com/its7171/2class-object-detection-inference
#### Here is another version with filtering: https://www.kaggle.com/artkulak/2class-object-detection-inference-with-filtering

A day ago I released a notebook with filter to remove some of the False Positives by leaving only predictions which are present in both "Endzone" and "Sideline" views. Here is one more filtering idea which is similar but achieves a bit higher public  LB score. Don't forget to properly validate your solutions, before adding those postprocessing extra steps to your pipelines.

##### Please upvote if this was helpful to you. Pressing "fork" is one click, pressing "upvote" is just one extra click which shouldn't take a lot of your time :)

In [1]:
import pandas as pd
d = pd.read_csv('../input/nfl-impact-detection/test_player_tracking.csv')
IS_PRIVATE = d.shape != (19269, 12)
print(IS_PRIVATE)

IS_PRIVATE = True

False


In [2]:
if IS_PRIVATE:
    !pip install ../input/nfl-lib/timm-0.1.26-py3-none-any.whl
    !tar xfz ../input/nfl-lib/pkgs.tgz
    # for pytorch1.6
    cmd = "sed -i -e 's/ \/ / \/\/ /' timm-efficientdet-pytorch/effdet/bench.py"
    !$cmd

Processing /kaggle/input/nfl-lib/timm-0.1.26-py3-none-any.whl
Installing collected packages: timm
Successfully installed timm-0.1.26


In [3]:
# if IS_PRIVATE, No module named 'effdet'

import sys
sys.path.insert(0, "./timm-efficientdet-pytorch")
sys.path.insert(0, "./omegaconf")
sys.path.insert(0, "../input/weightedboxesfusion")

import torch
import os
from datetime import datetime
import time
import random
import cv2
import pandas as pd
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from glob import glob
import pandas as pd
import gc
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain, DetBenchEval
from effdet.efficientdet import HeadNet
import warnings
from tqdm import tqdm
from PIL import Image
from ensemble_boxes import *

warnings.filterwarnings("ignore")

DATA_ROOT_PATH = 'test_images'
SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_everything(SEED)

In [4]:
#################
# SET CONSTANTS
#################

DETECTION_THRESHOLD = 0.15
CLASSIFICATION_THRESHOLD = 0.72
CROP_RATIO = 1.0
CUT_FRAME = 10

In [5]:
def mk_images(video_name, video_labels, video_dir, out_dir, only_with_impact=True):
    video_path=f"{video_dir}/{video_name}"
    video_name = os.path.basename(video_path)
    vidcap = cv2.VideoCapture(video_path)
    if only_with_impact:
        boxes_all = video_labels.query("video == @video_name")
        print(video_path, boxes_all[boxes_all.impact == 1.0].shape[0])
    else:
        print(video_path)
    frame = 0
    while True:
        it_worked, img = vidcap.read()
        if not it_worked:
            break
        frame += 1
        if only_with_impact:
            boxes = video_labels.query("video == @video_name and frame == @frame")
            boxes_with_impact = boxes[boxes.impact == 1.0]
            if boxes_with_impact.shape[0] == 0:
                continue
        img_name = f"{video_name}_frame{frame}"
        image_path = f'{out_dir}/{video_name}'.replace('.mp4',f'_{frame}.png')
        _ = cv2.imwrite(image_path, img)
    return frame

In [6]:
max_frame_dict = {}

if IS_PRIVATE:
    out_dir = DATA_ROOT_PATH
#     if not os.path.exists(out_dir):
    !mkdir -p $out_dir
    video_dir = '/kaggle/input/nfl-impact-detection/test'
    uniq_video = [path.split('/')[-1] for path in glob(f'{video_dir}/*.mp4')]
    for video_name in uniq_video:
        frame = mk_images(video_name, pd.DataFrame(), video_dir, out_dir, only_with_impact=False)
        max_frame_dict[video_name] = frame

/kaggle/input/nfl-impact-detection/test/57906_000718_Sideline.mp4
/kaggle/input/nfl-impact-detection/test/57906_000718_Endzone.mp4
/kaggle/input/nfl-impact-detection/test/57995_000109_Endzone.mp4
/kaggle/input/nfl-impact-detection/test/58102_002798_Sideline.mp4
/kaggle/input/nfl-impact-detection/test/57995_000109_Sideline.mp4
/kaggle/input/nfl-impact-detection/test/58102_002798_Endzone.mp4


In [7]:
max_frame_dict

{'57906_000718_Sideline.mp4': 440,
 '57906_000718_Endzone.mp4': 434,
 '57995_000109_Endzone.mp4': 529,
 '58102_002798_Sideline.mp4': 366,
 '57995_000109_Sideline.mp4': 529,
 '58102_002798_Endzone.mp4': 366}

In [8]:
def get_valid_transforms():
    return A.Compose([
            A.Resize(height=1280, width=1280, interpolation=cv2.INTER_CUBIC, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)

In [9]:
class DatasetRetriever(Dataset):
    def __init__(self, image_ids, transforms=None):
        super().__init__()
        self.image_ids = image_ids
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        image = cv2.imread(f'{DATA_ROOT_PATH}/{image_id}', cv2.IMREAD_COLOR).copy().astype(np.float32)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']
        return image, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

In [10]:
def load_net(checkpoint_path):
    config = get_efficientdet_config('tf_efficientdet_d5')
    net = EfficientDet(config, pretrained_backbone=False)
    config.num_classes = 1
    config.image_size=1280
    net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01))
    checkpoint = torch.load(checkpoint_path)
    net.load_state_dict(checkpoint['model_state_dict'])
    net = DetBenchEval(net, config)
    net.eval();
    return net.cuda()
if IS_PRIVATE:
#     net = load_net('../input/1effdet-1280-1alass/best-checkpoint-003epoch.bin')
#     net = load_net('../input/2-effdet-noim/best-checkpoint-001epoch.bin')
    net = load_net('../input/3-effdet-noim/fold0-002epoch.bin')

In [11]:
dataset = DatasetRetriever(
    image_ids=np.array([path.split('/')[-1] for path in glob(f'{DATA_ROOT_PATH}/*.png')]),
    transforms=get_valid_transforms()
)

def collate_fn(batch):
    return tuple(zip(*batch))

data_loader = DataLoader(
    dataset,
    batch_size=16,
    shuffle=False,
    num_workers=4,
    drop_last=False,
    collate_fn=collate_fn
)

In [12]:
# def make_predictions(images, filter_num, score_threshold=0.5):
#     images = torch.stack(images).cuda().float()
#     box_list = []
#     score_list = []
#     with torch.no_grad():
#         det = net(images, torch.tensor([1]*images.shape[0]).float().cuda())
#         for i in range(images.shape[0]):
#             boxes = det[i].detach().cpu().numpy()[:,:4]    
#             scores = det[i].detach().cpu().numpy()[:,4]   
#             label = det[i].detach().cpu().numpy()[:,5]
#             # useing only label = 2
#             indexes = np.where((scores > score_threshold) & (label == filter_num))[0]
#             boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
#             boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
#             box_list.append(boxes[indexes])
#             score_list.append(scores[indexes])
#     return box_list, score_list


def make_predictions(images, filter_num, score_threshold=0.25):
    with torch.no_grad():
        images = torch.stack(images).float().cuda()
        predictions = []
        for tta_index in range(2):
            if tta_index==1:
                target_images = images.flip(3).clone()
            else:
                target_images = images.clone()
            result = []
            det = net(target_images, torch.tensor([1]*target_images.shape[0]).float().cuda())

            for i in range(target_images.shape[0]):
                boxes = det[i].detach().cpu().numpy()[:,:4]    
                scores = det[i].detach().cpu().numpy()[:,4]
                label = det[i].detach().cpu().numpy()[:,5]
                indexes = np.where((scores > score_threshold) & (label == filter_num))[0]
                boxes = boxes[indexes]
                boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
                boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
                if tta_index==1:
                    boxes[:, [0,2]] = 1280 - boxes[:, [2,0]]
                result.append({
                    'boxes': boxes,
                    'scores': scores[indexes],
                })
            predictions.append(result)
    return predictions


def run_wbf(predictions, image_index, image_size=1280, iou_thr=0.4, skip_box_thr=0.3, weights=None):
    boxes = [(prediction[image_index]['boxes']/(image_size-1)).tolist() for prediction in predictions]
    scores = [prediction[image_index]['scores'].tolist() for prediction in predictions]
    labels = [np.ones(prediction[image_index]['scores'].shape[0]).astype(int).tolist() for prediction in predictions]
    boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    boxes = boxes*(image_size-1)
    return boxes, scores, labels

In [13]:
def iou(bbox1, bbox2):
    bbox1 = [float(x) for x in bbox1]
    bbox2 = [float(x) for x in bbox2]

    (x0_1, y0_1, x1_1, y1_1) = bbox1
    (x0_2, y0_2, x1_2, y1_2) = bbox2

    # get the overlap rectangle
    overlap_x0 = max(x0_1, x0_2)
    overlap_y0 = max(y0_1, y0_2)
    overlap_x1 = min(x1_1, x1_2)
    overlap_y1 = min(y1_1, y1_2)

    # check if there is an overlap
    if overlap_x1 - overlap_x0 <= 0 or overlap_y1 - overlap_y0 <= 0:
            return 0

    # if yes, calculate the ratio of the overlap to each ROI size and the unified size
    size_1 = (x1_1 - x0_1) * (y1_1 - y0_1)
    size_2 = (x1_2 - x0_2) * (y1_2 - y0_2)
    size_intersection = (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0)
    size_union = size_1 + size_2 - size_intersection

    return size_intersection / size_union

In [14]:
from torch import nn
CFG = {
    'model_arch': '',
    'img_size': 112,
    'device': 'cuda:0'
}

def get_img(path):
    im_bgr = cv2.imread(path)
    im_rgb = im_bgr[:, :, ::-1]
    return im_rgb


from torchvision.models.resnet import resnet18
class Classifier2d(nn.Module):
    def __init__(self, model_arch, n_class, pretrained=False):
        super().__init__()
        self.backbone = resnet18(pretrained=False, progress=False)

        num_in_channels = 27
        self.backbone.conv1 = nn.Conv2d(
            num_in_channels,
            self.backbone.conv1.out_channels,
            kernel_size=self.backbone.conv1.kernel_size,
            stride=self.backbone.conv1.stride,
            padding=self.backbone.conv1.padding,
            bias=False,
        )
        self.head = nn.Sequential(
            # nn.Dropout(0.2),
            nn.Linear(in_features=512, out_features=4096),
        )
        self.logit = nn.Linear(4096, out_features=2)

    def forward(self, x):
        x = self.backbone.conv1(x)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)

        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)

        x = self.backbone.avgpool(x)
        x = torch.flatten(x, 1)

        x = self.head(x)
        x = self.logit(x)
        return x


from torchvision.models.video import r3d_18
class Classifier3d(nn.Module):
    def __init__(self, model_arch, n_class, pretrained=False):
        super().__init__()

        self.backbone = r3d_18(pretrained=False, progress=False)

        self.head = nn.Sequential(
            # nn.Dropout(0.2),
            nn.Linear(in_features=512, out_features=4096),
        )
        self.logit = nn.Linear(4096, out_features=2)

    def forward(self, x):
        x = self.backbone.stem(x)
        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)

        x = self.backbone.avgpool(x)
        x = torch.flatten(x, 1)

        x = self.head(x)
        x = self.logit(x)

        return x

In [15]:
softmax = nn.Softmax(dim=1)
def pred_classification(model, imgs):
    imgs = imgs.to(CFG['device']).float()
    image_preds = model(imgs)
    return softmax(image_preds)

net_2dmodel_path_list = [
    '../input/9-resnet18-9-viscon-noim-batch32/fold_0_epoch_4_0.95336',
    '../input/9-resnet18-9-viscon-noim-batch32/fold_1_epoch_1_0.96258',
    '../input/9-resnet18-9-viscon-noim-batch32/fold_2_epoch_8_0.97242',
    '../input/9-resnet18-9-viscon-noim-batch32/fold_3_epoch_8_0.95914',
    '../input/9-resnet18-9-viscon-noim-batch32/fold_4_epoch_5_0.97043'
]
net_3dmodel_path_list = [
    '../input/11-resnet18-3d-4fold/fold_0_epoch_0_0.96528',
    '../input/11-resnet18-3d-4fold/fold_1_epoch_9_0.97242',
    '../input/11-resnet18-3d-4fold/fold_2_epoch_6_0.96614',
    '../input/12-resnet18-3d-fold3/fold_3_epoch_1_0.96545',
]

net_2dmodel_list = []
for net_model_path in net_2dmodel_path_list:
    net_model = Classifier2d(CFG['model_arch'], 2).to(CFG['device'])
    net_model.load_state_dict(torch.load(net_model_path))
    net_model = net_model.to(CFG['device'])
    net_model.eval()
    net_2dmodel_list.append(net_model)
    
net_3dmodel_list = []
for net_model_path in net_3dmodel_path_list:
    net_model = Classifier3d(CFG['model_arch'], 2).to(CFG['device'])
    net_model.load_state_dict(torch.load(net_model_path))
    net_model = net_model.to(CFG['device'])
    net_model.eval()
    net_3dmodel_list.append(net_model)

In [16]:
def imcrop(img, bbox):
    x1, y1, x2, y2 = bbox
    if x1 < 0 or y1 < 0 or x2 > img.shape[1] or y2 > img.shape[0]:
        img, x1, x2, y1, y2 = pad_img_to_fit_bbox(img, x1, x2, y1, y2)
    return img[y1:y2, x1:x2, :]

def pad_img_to_fit_bbox(img, x1, x2, y1, y2):
    img = cv2.copyMakeBorder(img, - min(0, y1), max(y2 - img.shape[0], 0),
                            -min(0, x1), max(x2 - img.shape[1], 0),cv2.BORDER_CONSTANT)
    y2 += -min(0, y1)
    y1 += -min(0, y1)
    x2 += -min(0, x1)
    x1 += -min(0, x1)
    return img, x1, x2, y1, y2

# opencv
def crop_area(image, x1, y1, x2, y2):
    height, width, channel = image.shape
#     width, height = image.size
    target_width, target_height = x2-x1, y2-y1
    
#     left_mergin, right_mergin = target_width*0.5, target_width*0.5
#     top_mergin, bottom_mergin = target_height*0.5, target_height*0.5
    left_mergin, right_mergin = target_width*CROP_RATIO, target_width*CROP_RATIO
    top_mergin, bottom_mergin = target_height*CROP_RATIO, target_height*CROP_RATIO

    left = int(round(x1-left_mergin))
    top = int(round(y1-top_mergin))
    right = int(round(x2+right_mergin))
    bottom = int(round(y2+bottom_mergin))
    
    crop_width = right - left
    crop_height = bottom - top
#     print("crop_width, crop_height:",crop_width, crop_height)
    
    diff = abs(crop_width-crop_height)
    if crop_width > crop_height:
        top_mergin = int(round(diff/2))
        bottom_mergin = diff-top_mergin
        top -= top_mergin
        bottom += bottom_mergin
    if crop_width < crop_height:
        right_mergin = int(round(diff/2))
        left_mergin = diff-right_mergin
        left -= left_mergin
        right += right_mergin
        
#     print("left, top, right, bottom:",left, top, right, bottom)
    
    image = imcrop(image, [left,top,right,bottom])
    return image

In [17]:
# #check prediction
# import matplotlib.pyplot as plt

# cnt = 0
# for images, image_ids in data_loader:
#     predictions = make_predictions(images, 1, score_threshold=0.2)
    
#     for i in range(len(images)):
#         boxes, scores, labels = run_wbf(predictions, image_index=i, skip_box_thr=0.2)
#         boxes = boxes.astype(np.int32).clip(min=0, max=1279)
#         sample = images[i].permute(1,2,0).cpu().numpy()
#         sample = cv2.resize(sample , (int(1280), int(720)))
        
#         if len(scores) >= 1:
#             fig, ax = plt.subplots(1, 1, figsize=(16, 8))
            
#             for box,score in zip(boxes,scores):
#                 box[0] = box[0]
#                 box[1] = box[1] * 720 / 1280
#                 box[2] = box[2]
#                 box[3] = box[3] * 720 / 1280
#                 cv2.rectangle(sample, (box[0], box[1]), (box[2], box[3]), (1, 0, 0), 3)
#             ax.set_axis_off()
#             ax.imshow(sample)
#             cnt += 1
#     if cnt >= 10:
#         break



In [18]:
result_image_ids = []
results_boxes = []
results_scores = []
for images, image_ids in tqdm(data_loader):
    predictions = make_predictions(images, 1, score_threshold=0.2)
    for i in range(len(images)):
        boxes, scores, labels = run_wbf(predictions, image_index=i, skip_box_thr=0.2)
        image_id = image_ids[i]
        
        boxes[:, 0] = (boxes[:, 0])
        boxes[:, 1] = (boxes[:, 1] * 720 / 1280)
        boxes[:, 2] = (boxes[:, 2])
        boxes[:, 3] = (boxes[:, 3] * 720 / 1280)
        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
        boxes = boxes.astype(np.int32)
        boxes[:, 0] = boxes[:, 0].clip(min=0, max=1280-1)
        boxes[:, 2] = boxes[:, 2].clip(min=0, max=1280-1)
        boxes[:, 1] = boxes[:, 1].clip(min=0, max=720-1)
        boxes[:, 3] = boxes[:, 3].clip(min=0, max=720-1)
        result_image_ids += [image_id]*len(boxes)
        results_boxes.append(boxes)
        results_scores.append(scores)


100%|██████████| 167/167 [16:57<00:00,  6.09s/it]


In [19]:
box_df = pd.DataFrame(np.concatenate(results_boxes), columns=['left', 'top', 'width', 'height'])
test_df = pd.DataFrame({'scores':np.concatenate(results_scores), 'image_name':result_image_ids})
test_df = pd.concat([test_df, box_df], axis=1)

test_df = test_df[test_df.scores > DETECTION_THRESHOLD].reset_index(drop=True)
test_df

Unnamed: 0,scores,image_name,left,top,width,height
0,0.683944,57995_000109_Sideline_336.png,524,399,12,12
1,0.669834,57995_000109_Sideline_336.png,663,337,11,11
2,0.594288,57995_000109_Sideline_336.png,27,235,13,12
3,0.589849,57995_000109_Sideline_336.png,900,207,12,12
4,0.579833,57995_000109_Sideline_336.png,733,130,10,12
...,...,...,...,...,...,...
51737,0.579655,57906_000718_Endzone_248.png,815,293,17,20
51738,0.403737,57906_000718_Endzone_248.png,481,439,17,18
51739,0.356750,57906_000718_Endzone_248.png,475,418,14,16
51740,0.267391,57906_000718_Endzone_248.png,732,110,19,26


In [20]:
#すべてのヘルメットを確認
new_result_list = []

for image_index, image_name in enumerate(tqdm(test_df["image_name"].unique())):
#     if image_index == 300:
#         break
#     print("image_name:",image_name)

    this_video_name = image_name.rsplit('_',1)[0] + '.mp4'
    this_max_frame = max_frame_dict[this_video_name]
#     print("this_video_name:",this_video_name)
#     print("this_max_frame:",this_max_frame)

    gameKey = image_name.split('_')[0]
    playID = image_name.split('_')[1]
    view = image_name.split('_')[2]
    frame = int(image_name.split('_')[3].replace('.png',''))
#     print("frame:",frame)
    
    if frame < CUT_FRAME or frame > this_max_frame-CUT_FRAME:
#         print("continue")
#         raise
        continue
    
    
    image_list = []
    for frame_diff in [-4,-3,-2,-1,0,1,2,3,4]:
        target_frame = frame+frame_diff
        target_image_name = f"{DATA_ROOT_PATH}/{gameKey}_{playID}_{view}_{target_frame}.png"
        
        if not os.path.exists(target_image_name):
            img = np.zeros([720,1280,3], dtype=np.uint8)
            image_list.append(img)
        else:
            img = get_img(target_image_name)
            image_list.append(img)
            
#             fig, ax = plt.subplots(1, 1, figsize=(16, 8))
#             ax.set_axis_off()
#             ax.imshow(img)
#     raise
    
#     fig, ax = plt.subplots(1, 1, figsize=(16, 8))
#     ax.set_axis_off()
#     ax.imshow(img[:,:,24:])
#     raise


    target_df = test_df[test_df["image_name"] == image_name]
#     print("target_df:",target_df)    

    tta_2dcropped_list = []
    tta_3dcropped_list = []
    for tta_index in range(2):
        cropped_2dlist = []
        cropped_3dlist = []

        for row in target_df.values:
#             print(row)
#             raise
            score, image_name, x1, y1, w, h = row
            x2, y2 = x1+w, y1+h

            cropped_2dimage_list = []
            cropped_3dimage_list = []
            for img in image_list:
                cropped_img = crop_area(img, x1, y1, x2, y2)
                cropped_img = cv2.resize(cropped_img,(CFG["img_size"],CFG["img_size"]),interpolation = cv2.INTER_CUBIC)
                if tta_index == 1:
                    cropped_img = A.HorizontalFlip(p=1)(image=cropped_img)['image']
                cropped_img = A.Normalize(p=1)(image=cropped_img)['image']
                cropped_2dimage_list.append(cropped_img)
                cropped_3dimage_list.append(ToTensorV2(p=1)(image=cropped_img)['image'])

            cropped_2dimg = np.concatenate(cropped_2dimage_list,2)
            cropped_2dimg = ToTensorV2(p=1)(image=cropped_2dimg)['image']
            cropped_2dlist.append(cropped_2dimg)

            cropped_3dimg = torch.stack(cropped_3dimage_list, 1)
            cropped_3dlist.append(cropped_3dimg)

        tta_2dcropped_list.append(cropped_2dlist)
        tta_3dcropped_list.append(cropped_3dlist)
        
        
        
    folds_2dimage_preds_list = []
    folds_3dimage_preds_list = []
    with torch.no_grad():
        
        for net_model in net_2dmodel_list:
            tta_2dimage_preds = []
            for cropped_list in tta_2dcropped_list:
                img = torch.stack(cropped_list)
                image_preds = pred_classification(net_model, img)
#                 print("2d image_preds:",image_preds.detach().cpu().numpy()[:,1])
                tta_2dimage_preds.append(image_preds.detach().cpu().numpy()[:,1])
#             print("1 tta_2dimage_preds:",tta_2dimage_preds)
            tta_2dimage_preds = np.mean(tta_2dimage_preds,0)
#             print("tta ensemble tta_2dimage_preds:",tta_2dimage_preds)
            folds_2dimage_preds_list.append(tta_2dimage_preds)

        for net_model in net_3dmodel_list:
            tta_3dimage_preds = []
            for cropped_list in tta_3dcropped_list:
                img = torch.stack(cropped_list)
                image_preds = pred_classification(net_model, img)
#                 print("3d image_preds:",image_preds.detach().cpu().numpy()[:,1])
                tta_3dimage_preds.append(image_preds.detach().cpu().numpy()[:,1])
#             print("1 tta_3dimage_preds:",tta_3dimage_preds)
            tta_3dimage_preds = np.mean(tta_3dimage_preds,0)
#             print("tta ensemble tta_3dimage_preds:",tta_3dimage_preds)
            folds_3dimage_preds_list.append(tta_3dimage_preds)
            

#     print("folds_2dimage_preds_list:",folds_2dimage_preds_list)
#     print("folds_3dimage_preds_list:",folds_3dimage_preds_list)
    folds_2dimage_preds_list = np.mean(folds_2dimage_preds_list,0)
    folds_3dimage_preds_list = np.mean(folds_3dimage_preds_list,0)
#     print("folds_2dimage_preds_list:",folds_2dimage_preds_list)
#     print("folds_3dimage_preds_list:",folds_3dimage_preds_list)
    image_preds = np.mean([folds_2dimage_preds_list*0.5 + folds_3dimage_preds_list*0.5],0)
#     print("image_preds:",image_preds)
#     raise
        
    preds_list = np.concatenate([image_preds.reshape(-1,1),np.array([image_name]*image_preds.shape[0]).reshape(-1,1),target_df.values[:,2:]],1)

    new_result_list.extend(preds_list.tolist())

100%|██████████| 2664/2664 [1:34:28<00:00,  2.13s/it]


In [21]:
test_df = pd.DataFrame(new_result_list, columns=['scores', 'image_name', 'left', 'top', 'width', 'height'])
test_df = test_df[test_df.scores > CLASSIFICATION_THRESHOLD].reset_index(drop=True).reset_index(drop = True)
test_df

Unnamed: 0,scores,image_name,left,top,width,height
0,0.783200,57995_000109_Endzone_65.png,472,284,17,17
1,0.750351,57906_000718_Sideline_109.png,783,335,13,15
2,0.874615,58102_002798_Endzone_249.png,952,248,24,27
3,0.866899,57995_000109_Sideline_59.png,462,412,14,15
4,0.798738,57995_000109_Sideline_59.png,475,418,12,11
...,...,...,...,...,...,...
179,0.955984,57906_000718_Endzone_30.png,628,266,17,23
180,0.880026,57906_000718_Endzone_30.png,647,267,14,15
181,0.959732,58102_002798_Sideline_48.png,396,339,17,20
182,0.754605,58102_002798_Sideline_48.png,429,354,18,21


In [22]:
test_df['frame'] = test_df.image_name.str.split('_').str[3].str.replace('.png','').astype(int)
test_df['video'] = test_df.image_name.str.rsplit('_',1).str[0] + '.mp4'
test_df = test_df.sort_values('frame').reset_index(drop = True)
test_df

Unnamed: 0,scores,image_name,left,top,width,height,frame,video
0,0.880026,57906_000718_Endzone_30.png,647,267,14,15,30,57906_000718_Endzone.mp4
1,0.955984,57906_000718_Endzone_30.png,628,266,17,23,30,57906_000718_Endzone.mp4
2,0.819211,57906_000718_Endzone_31.png,613,274,19,15,31,57906_000718_Endzone.mp4
3,0.966593,57906_000718_Sideline_31.png,763,334,10,14,31,57906_000718_Sideline.mp4
4,0.961537,57906_000718_Sideline_31.png,756,336,11,13,31,57906_000718_Sideline.mp4
...,...,...,...,...,...,...,...,...
179,0.856686,58102_002798_Endzone_259.png,939,241,25,22,259,58102_002798_Endzone.mp4
180,0.743822,57995_000109_Sideline_271.png,891,184,13,12,271,57995_000109_Sideline.mp4
181,0.732943,58102_002798_Endzone_277.png,886,361,18,21,277,58102_002798_Endzone.mp4
182,0.802520,58102_002798_Endzone_300.png,966,384,20,26,300,58102_002798_Endzone.mp4


In [23]:
#結果を圧縮する
#複数フレームにまたがっている矩形は一つにする

frame_diff = 4
iou_thresh = 0.35

all_selected_index_list = []

for video_name in test_df["video"].unique():
    
    target_df = test_df[test_df["video"] == video_name]
    
    selected_index_list = []
    bbox_list = []
    for df_index, row in target_df.iterrows():
        score, image_name, x1, y1, w, h, frame = row["scores"], row["image_name"], row["left"], row["top"], row["width"], row["height"], row["frame"]
        x1, y1, x2, y2 = x1, y1, x1+w, y1+h
        new_box = [df_index, score, frame, x1, y1, x2, y2]

        if len(bbox_list) == 0:
            bbox_list.append([new_box])
        else:
            append_flag = False
            for bboxes in bbox_list:
                if iou(bboxes[-1][3:],new_box[3:])>iou_thresh and abs(bboxes[-1][2]-frame)<frame_diff:
                    bboxes.append(new_box)
                    append_flag = True
            if not append_flag:
                bbox_list.append([new_box])
        
    for bboxes in bbox_list:
        selected_index_list.append(bboxes[int(len(bboxes)/2)][0])
    all_selected_index_list.extend(selected_index_list)

test_df = test_df.iloc[all_selected_index_list].reset_index(drop = True)
test_df

Unnamed: 0,scores,image_name,left,top,width,height,frame,video
0,0.961641,57906_000718_Endzone_31.png,645,265,16,17,31,57906_000718_Endzone.mp4
1,0.923834,57906_000718_Endzone_32.png,627,263,18,21,32,57906_000718_Endzone.mp4
2,0.769508,57906_000718_Endzone_34.png,616,273,17,14,34,57906_000718_Endzone.mp4
3,0.972546,57906_000718_Endzone_33.png,816,276,19,17,33,57906_000718_Endzone.mp4
4,0.892349,57906_000718_Endzone_33.png,802,270,17,16,33,57906_000718_Endzone.mp4
...,...,...,...,...,...,...,...,...
67,0.940661,58102_002798_Sideline_53.png,380,395,17,25,53,58102_002798_Sideline.mp4
68,0.900165,58102_002798_Sideline_54.png,394,401,16,17,54,58102_002798_Sideline.mp4
69,0.922564,58102_002798_Sideline_135.png,594,370,15,19,135,58102_002798_Sideline.mp4
70,0.840171,58102_002798_Sideline_136.png,605,377,13,15,136,58102_002798_Sideline.mp4


In [24]:
#gameKey,playID,view,video,frame,left,width,top,height
#57590,3607,Endzone,57590_003607_Endzone.mp4,1,1,1,1,1
test_df['gameKey'] = test_df.image_name.str.split('_').str[0].astype(int)
test_df['playID'] = test_df.image_name.str.split('_').str[1].astype(int)
test_df['view'] = test_df.image_name.str.split('_').str[2]
test_df['frame'] = test_df.image_name.str.split('_').str[3].str.replace('.png','').astype(int)
test_df['video'] = test_df.image_name.str.rsplit('_',1).str[0] + '.mp4'
test_df = test_df[["gameKey","playID","view","video","frame","left","width","top","height"]]
test_df

Unnamed: 0,gameKey,playID,view,video,frame,left,width,top,height
0,57906,718,Endzone,57906_000718_Endzone.mp4,31,645,16,265,17
1,57906,718,Endzone,57906_000718_Endzone.mp4,32,627,18,263,21
2,57906,718,Endzone,57906_000718_Endzone.mp4,34,616,17,273,14
3,57906,718,Endzone,57906_000718_Endzone.mp4,33,816,19,276,17
4,57906,718,Endzone,57906_000718_Endzone.mp4,33,802,17,270,16
...,...,...,...,...,...,...,...,...,...
67,58102,2798,Sideline,58102_002798_Sideline.mp4,53,380,17,395,25
68,58102,2798,Sideline,58102_002798_Sideline.mp4,54,394,16,401,17
69,58102,2798,Sideline,58102_002798_Sideline.mp4,135,594,15,370,19
70,58102,2798,Sideline,58102_002798_Sideline.mp4,136,605,13,377,15


In [25]:
test_df.to_csv('sub_test_df.csv', index=False)

In [26]:
# clearing working dir
# be careful when running this code on local environment!
# !rm -rf *
!mv * /tmp/

In [27]:
import nflimpact
env = nflimpact.make_env()

if IS_PRIVATE:
    env.predict(test_df) # df is a pandas dataframe of your entire submission file
else:
    sub = pd.read_csv('../input/nfl-impact-detection/sample_submission.csv')
    env.predict(sub)

In [28]:
# #すべてのヘルメットを確認
# new_result_list = []

# for image_index, image_name in enumerate(tqdm(test_df["image_name"].unique())):
# #     if image_index == 300:
# #         break
# #     print("image_name:",image_name)

#     this_video_name = image_name.rsplit('_',1)[0] + '.mp4'
#     this_max_frame = max_frame_dict[this_video_name]
# #     print("this_video_name:",this_video_name)
# #     print("this_max_frame:",this_max_frame)

#     gameKey = image_name.split('_')[0]
#     playID = image_name.split('_')[1]
#     view = image_name.split('_')[2]
#     frame = int(image_name.split('_')[3].replace('.png',''))
# #     print("frame:",frame)
    
#     if frame < CUT_FRAME or frame > this_max_frame-CUT_FRAME:
# #         print("continue")
# #         raise
#         continue
    
    
#     image_list = []
#     for frame_diff in [-4,-3,-2,-1,0,1,2,3,4]:
#         target_frame = frame+frame_diff
#         target_image_name = f"{DATA_ROOT_PATH}/{gameKey}_{playID}_{view}_{target_frame}.png"
        
#         if not os.path.exists(target_image_name):
#             img = np.zeros([720,1280,3], dtype=np.uint8)
#             image_list.append(img)
#         else:
#             img = get_img(target_image_name)
#             image_list.append(img)
            
# #             fig, ax = plt.subplots(1, 1, figsize=(16, 8))
# #             ax.set_axis_off()
# #             ax.imshow(img)
# #     raise
    
# #     fig, ax = plt.subplots(1, 1, figsize=(16, 8))
# #     ax.set_axis_off()
# #     ax.imshow(img[:,:,24:])
# #     raise


#     target_df = test_df[test_df["image_name"] == image_name]
# #     print("target_df:",target_df)    

#     tta_cropped_list = []
#     #tta times
#     for tta_index in range(2):
#         cropped_list = []
#         for row in target_df.values:
#     #         print("row",row)
#             score, image_name, x1, y1, w, h = row
#             x2, y2 = x1+w, y1+h

#             cropped_image_list = []
#             for img in image_list:
#                 cropped_img = crop_area(img, x1, y1, x2, y2)
#                 cropped_img = cv2.resize(cropped_img,(CFG["img_size"],CFG["img_size"]),interpolation = cv2.INTER_CUBIC)
                
#                 if tta_index == 1:
#                     cropped_img = A.HorizontalFlip(p=1)(image=cropped_img)['image']
#                 cropped_img = A.Normalize(p=1)(image=cropped_img)['image']
                
#                 cropped_image_list.append(cropped_img)
                

#     #             for img in cropped_image_list:
#     #                 fig, ax = plt.subplots(1, 1, figsize=(16, 8))
#     #                 ax.set_axis_off()
#     #                 ax.imshow(img)
#     #             raise   
                
#             cropped_img = np.concatenate(cropped_image_list,2)
#             cropped_img = ToTensorV2(p=1)(image=cropped_img)['image']
#             cropped_list.append(cropped_img)
#         tta_cropped_list.append(cropped_list)
        
        
#     image_preds_list = []
#     with torch.no_grad():
#         for net_model in net_model_list:
#             tta_image_preds = []
#             for cropped_list in tta_cropped_list:
#                 img = torch.stack(cropped_list)
#                 image_preds = pred_classification(net_model, img)
#                 tta_image_preds.append(image_preds.detach().cpu().numpy()[:,1])
# #             print("tta_image_preds:",tta_image_preds)
#             tta_image_preds = np.mean(tta_image_preds,0)
#             image_preds_list.append(tta_image_preds)

# #     print("image_preds_list:",image_preds_list)
#     image_preds = np.mean(image_preds_list,0)
# #     print("image_preds:",image_preds)
# #     raise
    
#     preds_list = np.concatenate([image_preds.reshape(-1,1),np.array([image_name]*image_preds.shape[0]).reshape(-1,1),target_df.values[:,2:]],1)

#     new_result_list.extend(preds_list.tolist())