# Inference

In [None]:
import gc
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
# This is for the progress bar.
from tqdm.auto import tqdm
import random
from pathlib import Path
from argparse import Namespace

In [None]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = models.efficientnet_v2_s(weights = models.EfficientNet_V2_S_Weights.IMAGENET1K_V1)
        self.fc = nn.Linear(1000 , 2)
        
    def forward(self, x):
        out = self.cnn(x)
        out = self.fc(out)
        return out

In [None]:
# image_size = (440,310)
image_size = (220,105)
test_tfm = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
])

train_tfm = transforms.Compose([
    transforms.Resize(image_size),
    transforms.RandomAdjustSharpness(1.5, p=0.5),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(0.2,0.2,0.2,0.05),
    transforms.ToTensor(),
])

In [None]:
img_sizex , img_sizey = 1280 , 720
crop_xt , crop_yt = 200 , 100
crop_xd , crop_yd = 200 , 0
resize_factor = 4
new_sizex = int(( img_sizex - crop_xt - crop_xd ) / resize_factor )
new_sizey = int(( img_sizey - crop_yt - crop_yd ) / resize_factor )
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
import cv2

def get_video_frame(vid_path , tfm = test_tfm):
    vid_cap = cv2.VideoCapture(vid_path)
    frames = []
    
    while vid_cap.isOpened():
        ret, frame = vid_cap.read()
        if not ret:
            break
#         print(ret)

        frame = frame[crop_yt : img_sizey-crop_yd , crop_xt : img_sizex - crop_xd]
        frame = cv2.resize(frame, (new_sizex , new_sizey), interpolation=cv2.INTER_AREA)
        frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        frame = tfm(frame)
        #         frame = torch.tensor(frame)
#         frame = torch.transpose(frame,0,1)
        
        frames.append(frame)
        
    return torch.stack(frames, 0)

In [None]:
class VideoDataset(torch.utils.data.IterableDataset):
    def __init__(self, root, frame_transform=None, video_transform=None):
        super(VideoDataset).__init__()

        video_path = Path(root).glob('*')

        self.filename = []
    
        for video in video_path:
            self.filename.append(f"{video}/{video.name}.mp4")

    def __iter__(self):
        
        temp_video = None
        last_path = ""
        
        for video_path in self.filename:

            video  = get_video_frame(video_path)
            pts = 1
            for subvideo in video:
                    
                output = {
                    'path': video_path.split('/')[-1],
                    'pts' : pts ,
                    'img': subvideo,
                }
                pts+=1
                    
                yield output

In [None]:
inference_dataset = VideoDataset("/kaggle/input/badminton/inference/inference")
inference_loader = DataLoader(inference_dataset, batch_size=32)


In [None]:
model_best = Classifier().to(device)
# model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()

preds = {'labels' : [],
         'paths' : [],
         'pts' :[],
        }


with torch.no_grad():
    for data_dic in tqdm(inference_loader):
#         print(data_dic['img'].shape)
        pred = model_best(data_dic['img'])
#         torch.cat((pred , data_dic['path']))
    
        label = np.argmax(pred.cpu().data.numpy(), axis=1)
        path = data_dic['path']
        pts = data_dic['pts']
        
        preds['labels'] +=label.tolist()
        preds['paths'] += path
        preds['pts'] += pts.tolist()
        #         prediction = np.append(prediction , pred_path )
        break
print(preds)



0it [00:00, ?it/s]

{'labels': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'paths': ['00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4', '00114.mp4'], 'pts': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]}


In [None]:
# result = pd.read_csv("/kaggle/input/badminton/dataset/dataset/valid/labels/00001.csv")
# result = {'VideoName' :[],
#          'ShotSeq' : [],
#          'HitFrmae' : [],
#          'Hitter' : [],
#          "RoundHead" : [],
#          "Backhand" : [],
#          "BallHeight" : [],
#          "LandingX":[],
#          "LandingY":[],
#          "HitterLocationX":[],
#          "HitterLocationY":[],
#          "DefenderLocationX":[],
#          "DefenderLocationY":[],
#          "BallType":[],
#          "Winner":[]
result = pd.DataFrame(columns=['VideoName','ShotSeq','HitFrmae','Hitter',"RoundHead","Backhand","BallHeight","LandingX","LandingY","HitterLocationX","HitterLocationY",
         "DefenderLocationX","DefenderLocationY","BallType","Winner"])
print(result.columns)  

for idx in range(len(preds['labels'])):
    if pred_labels[idx] == 1:
        df = result.loc[result['VideoName'] == preds['paths'][idx]]
        if pd.isna( df['ShotSeq'].max() ):
            print(([preds['paths'][idx] , 1 , preds['pts'][idx] , "A"  , 1 , 1 , 1 , 1 ,1 ,1 ,1 ,1,1,1,'X']))
            df_add = pd.DataFrame([preds['paths'][idx] , 1 , preds['pts'][idx] , "A"  , 1 , 1 , 1 , 1 ,1 ,1 ,1 ,1,1,1,'X'] , columns = result.columns)
        else:
            df_add = pd.DataFrame([preds['paths'][idx] ,  df['ShotSeq'].max()+1 , preds['pts'][idx] , "A"  , 1 , 1 , 1 , 1 ,1 ,1 ,1 ,1,1,1,'X'] , columns = result.columns)
        
        result = pd.concat([result , df_add])
    break
print(result)

Index(['VideoName', 'ShotSeq', 'HitFrmae', 'Hitter', 'RoundHead', 'Backhand',
       'BallHeight', 'LandingX', 'LandingY', 'HitterLocationX',
       'HitterLocationY', 'DefenderLocationX', 'DefenderLocationY', 'BallType',
       'Winner'],
      dtype='object')
['00114.mp4', 1, 1, 'A', 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 'X']


ValueError: Shape of passed values is (15, 1), indices imply (15, 15)