In [None]:
import pandas as pd
import glob
import seaborn as sns

df = pd.read_csv("../input/dfl-bundesliga-data-shootout/train.csv")
video_paths = glob.glob('../input/dfl-bundesliga-data-shootout/train/*.mp4')

In [None]:
df.head(10)

In [None]:
video_paths[:4]

## ffprobe



In [None]:
!ffprobe ../input/dfl-bundesliga-data-shootout/train/35bd9041_0.mp4

In [None]:
!ffprobe -of json ../input/dfl-bundesliga-data-shootout/train/35bd9041_0.mp4

In [None]:
!ffprobe -i ../input/dfl-bundesliga-data-shootout/clips/08fd33_2.mp4 -v quiet -select_streams v -show_entries frame=pkt_pts_time,pict_type

## ffmpeg

### 提取关键帧

In [None]:
# 提取I帧
!ffmpeg -i ../input/dfl-bundesliga-data-shootout/clips/08fd33_2.mp4 -vf "select=eq(pict_type\,I)"  -vsync vfr -qscale:v 2 -f image2 ./08fd33_2-I-%08d.jpg

# 提取P帧
!ffmpeg -i ../input/dfl-bundesliga-data-shootout/clips/08fd33_2.mp4 -vf "select=eq(pict_type\,P)"  -vsync vfr -qscale:v 2 -f image2 ./08fd33_2-P-%08d.jpg

# 提取B帧
!ffmpeg -i ../input/dfl-bundesliga-data-shootout/clips/08fd33_2.mp4 -vf "select=eq(pict_type\,B)"  -vsync vfr -qscale:v 2 -f image2 ./08fd33_2-B-%08d.jpg

### 均匀抽帧

In [None]:
# -r 指定抽取的帧率，即从视频中每秒钟抽取图片的数量。1代表每秒抽取一帧。
!ffmpeg -i ../input/dfl-bundesliga-data-shootout/clips/08fd33_2.mp4 -r 1 -q:v 2 -f image2 ./%08d.000000.jpg

### 抽取指定时间帧

In [None]:
!ffmpeg -ss 00:00:10 -i ../input/dfl-bundesliga-data-shootout/clips/08fd33_2.mp4 -frames:v 1 -q:v 2 output.jpg

### 提取视频片段

In [None]:
!ffmpeg -ss 00:00:01 -i ../input/dfl-bundesliga-data-shootout/clips/08fd33_2.mp4  -to 00:00:04  -c copy output.mp4

### 视频缩放

In [None]:
!ffmpeg -y -c:v h264_cuvid -i ../input/dfl-bundesliga-data-shootout/clips/08fd33_2.mp4 -an -s 384x216 -c:v h264_nvenc output.mp4

In [None]:
!ls -lh ../input/dfl-bundesliga-data-shootout/clips/08fd33_2.mp4

In [None]:
!ls -lh output.mp4

## 视频分类思路

### event 统计

In [None]:
import pandas as pd
import glob
import seaborn as sns

df = pd.read_csv("../input/dfl-bundesliga-data-shootout/train.csv")
video_paths = glob.glob('../input/dfl-bundesliga-data-shootout/train/*.mp4')

df.head(10)

In [None]:
event_df = []
for row in df.iterrows():
    if row[1].event == 'start':
        start = row[1].time
    if row[1].event not in ['start', 'end']:
        event = row[1].event
    if row[1].event == 'end':
        end = row[1].time
        event_df.append([
                row[1].video_id,
                event,
                end - start,
                start, end
        ])

In [None]:
event_df = pd.DataFrame(event_df, columns=['video_id', 'event', 'ts', 'start', 'end'])

In [None]:
event_df.head()

In [None]:
event_df.groupby('event')['ts'].mean()

In [None]:
event_df['event'].value_counts().plot(kind='bar')

In [None]:
sns.boxplot(x='event', y='ts', data=event_df)

In [None]:
sns.kdeplot(x='ts', data=event_df, hue='event')

In [None]:
event_df['video_id'].value_counts()

### 视频缩放

In [None]:
import pathlib
import os
import subprocess
import glob

In [None]:
if not os.path.exists('resize_video'):
    os.mkdir('resize_video')
    os.mkdir(os.path.join('resize_video', 'train'))

video_paths = glob.glob('../input/dfl-bundesliga-data-shootout/train/*.mp4')

In [None]:
# 2.5GB -> 500MB左右 (384 * 216)
# 2.5GB -> 170MB左右 (192 * 108)
for path in video_paths:
    continue
    basename = os.path.basename(path)
    aimpath = os.path.join('resize_video', 'train', basename)
    subprocess.check_output('ffmpeg -y -c:v h264_cuvid -i {0} -an -s 192x108 -c:v h264_nvenc {1}'.format(path, aimpath), shell=True)

### 数据集构建

In [None]:
from torchvision.io import read_video, read_video_timestamps

In [None]:
resize_video_paths = glob.glob(os.path.join('resize_video', 'train') + '/*.mp4')
vframes = read_video(resize_video_paths[3], 0, 1, pts_unit='sec')
vframes[0].shape

In [None]:
import cv2

def get_video_framecount(filename):
    video = cv2.VideoCapture(filename)
    frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)

    return frame_count

In [None]:
resize_video_duration = {}
for path in resize_video_paths:
    resize_video_duration[os.path.basename(path)[:-4]] = get_video_framecount(path) / 25.0

In [None]:
resize_video_duration

In [None]:
read_video('resize_video/train/9a97dae4_1.mp4', 3000, 3001, pts_unit='sec')[0].shape

In [None]:
event_df.head(3)

In [None]:
from torch.utils.data.dataset import Dataset
import numpy as np
import torch

CLASS_DICT = {
    'background': 0,
    'challenge': 1,
    'play': 2,
    'throwin': 3
}

class DFLVideoDataset(Dataset):
    def __init__(self, video_paths, clip_ts=1, transform=None):
        self.video_paths = video_paths
        self.clip_ts = clip_ts
        if transform is not None:
            self.transform = transform
        else:
            self.transform = None

    def __getitem__(self, index):
        randvideo = np.random.choice(self.video_paths)
        randvideo_name = os.path.basename(randvideo)[:-4]
        maxts = resize_video_duration[randvideo_name]

        if np.random.randint(0, 10) > 5:
            rand_start = np.random.uniform(200, maxts-self.clip_ts-2)
            clips = read_video(randvideo, rand_start, rand_start+self.clip_ts, pts_unit='sec')[0]

            randvideo_ts = event_df.loc[
                (event_df['video_id'] == randvideo_name) &
                (event_df['start'] >= rand_start-0.5) &
                (event_df['start'] <= rand_start+self.clip_ts+0.5)
            ]['event']
            if len(randvideo_ts) == 0:
                label = 'background'
            else:
                label = randvideo_ts.iloc[0]
        else:
            randvideo_ts = event_df.loc[
                (event_df['video_id'] == randvideo_name)
            ].sample(1)

            rand_start = randvideo_ts['start'].iloc[0]
            clips = read_video(randvideo, rand_start, rand_start+self.clip_ts, pts_unit='sec')[0]
            label = randvideo_ts['event'].iloc[0]

        if self.transform is not None:
            img = self.transform(image=img)['image']
        
        if clips.shape[0] == 1:
            print(randvideo, maxts, rand_start, rand_start+self.clip_ts)
        
        return clips[:self.clip_ts * 25] / 255.0 - 0.5, torch.from_numpy(np.array(CLASS_DICT[label]))

    def __len__(self):
        return 1000

In [None]:
train_loader = torch.utils.data.DataLoader(
    DFLVideoDataset(resize_video_paths[:-1]),
    batch_size=10, shuffle=True, num_workers=4, pin_memory=False
)

for data in train_loader:
    data[0] = torch.transpose(data[0], 1, 4)
    break

In [None]:
from torchvision.models.video import r3d_18

model = r3d_18(pretrained=True)
model.fc = torch.nn.Linear(512, 4)

model.cuda()
criterion = torch.nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), 0.005)

In [None]:
def train(train_loader, model, criterion, optimizer):
    model.train()
    train_loss = 0.0
    for i, (input, target) in enumerate(train_loader):
        input = torch.transpose(input, 1, 4)
        input = input.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

        if i%10 == 0:
            print(i, loss.item())
        
    return train_loss/len(train_loader)


def validate(val_loader, model, criterion):
    model.eval()

    val_acc = 0.0

    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            input = torch.transpose(input, 1, 4)
            input = input.cuda()
            target = target.cuda()

            # compute output
            output = model(input)
            loss = criterion(output, target)

            val_acc += (output.argmax(1) == target).sum().item()

    return val_acc / len(val_loader.dataset)

In [None]:
train_loader = torch.utils.data.DataLoader(
    DFLVideoDataset(resize_video_paths[:-1]),
    batch_size=10, shuffle=True, num_workers=4, pin_memory=False
)

val_loader = torch.utils.data.DataLoader(
    DFLVideoDataset(resize_video_paths[-1:]),
    batch_size=10, shuffle=True, num_workers=4, pin_memory=False
)

best_acc = 0
for _ in range(5):
    train_loss = train(train_loader, model, criterion, optimizer)
    val_acc = validate(val_loader, model, criterion)

    if val_acc > best_acc:
        torch.save(model.state_dict(), 'model.pth')
        best_acc = val_acc

    print(train_loss, val_acc)