## Загрузка данных

In [1]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-ea88df3d-7c92-6b20-a14c-d9cb59726531)


In [2]:
%%capture
!pip install git+https://github.com/openai/CLIP.git
!wget -O разметка.xlsx https://drive.google.com/uc?id=1UgUS15ZjHgCqxzUCSqxn-GnwNXePORAH
!gdown -O data.rar https://drive.google.com/uc?id=1QrhoG_HEcl9B-BHGFjLEFKOpkdgMKKZy
!unrar x data.rar

## Препроцессинг

In [3]:
import pandas as pd
import numpy as np
import cv2
from tqdm.notebook import tqdm
from collections import defaultdict
import os
import random

In [4]:
# подготовка разметки
data_targets = pd.read_excel("разметка.xlsx").dropna()
data_targets["Название"] = data_targets["Название"].astype(np.int32)

data_targets.loc[data_targets["ДТП"] == 1, "Начало дтп"] = data_targets.loc[data_targets["ДТП"] == 1, "Начало дтп"].apply(lambda x: x.minute + x.hour * 60)
data_targets.loc[data_targets["ДТП"] == 1, "Конец ДТП"] = data_targets.loc[data_targets["ДТП"] == 1, "Конец ДТП"].apply(lambda x: x.minute + x.hour * 60)
data_targets.loc[data_targets["ДТП"] == 1, "Время столкновения"] = data_targets.loc[data_targets["ДТП"] == 1, "Время столкновения"].apply(lambda x: x.minute + x.hour * 60 if x != -1 else -1)

In [82]:
# для удобного взаимодействия, предстваим видео в формате отдельных изображений
def convert2img(path2read: str, path2save: str, n: int = 10):
    vidcap = cv2.VideoCapture(path2read)
    filename = path2read.split("/")[-1]
    size = (224, 224)
    count = 1

    while True:
        success, image = vidcap.read()
        if not success:
            break
        if n and (count % n):
            image = cv2.resize(image, size)
            # формтат названия: {имя исходного видео}_{номер кадра видео}.png
            cv2.imwrite(f"{path2save}/{filename[:-4]}_{count}.png", image)
        else:
            # формтат названия: {имя исходного видео}_{номер кадра видео}.png
            cv2.imwrite(f"{path2save}/{filename[:-4]}_{count}.png", image)
        count += 1

    print(f"Done: {filename[:100]}")

    vidcap.release()
    return filename

In [6]:
!rm -rf images
!mkdir images

In [7]:
for filename in tqdm(data_targets["Название"]):
    convert2img(path2read=f"Онлайн этап/{filename}.mp4",
                path2save="images/")

  0%|          | 0/50 [00:00<?, ?it/s]

Done: 1.mp4
Done: 2.mp4
Done: 3.mp4
Done: 4.mp4
Done: 5.mp4
Done: 6.mp4
Done: 7.mp4
Done: 8.mp4
Done: 9.mp4
Done: 10.mp4
Done: 11.mp4
Done: 12.mp4
Done: 13.mp4
Done: 14.mp4
Done: 15.mp4
Done: 16.mp4
Done: 17.mp4
Done: 18.mp4
Done: 19.mp4
Done: 20.mp4
Done: 21.mp4
Done: 22.mp4
Done: 23.mp4
Done: 24.mp4
Done: 25.mp4
Done: 26.mp4
Done: 27.mp4
Done: 28.mp4
Done: 29.mp4
Done: 30.mp4
Done: 31.mp4
Done: 32.mp4
Done: 33.mp4
Done: 34.mp4
Done: 35.mp4
Done: 36.mp4
Done: 37.mp4
Done: 38.mp4
Done: 39.mp4
Done: 40.mp4
Done: 41.mp4
Done: 42.mp4
Done: 43.mp4
Done: 44.mp4
Done: 45.mp4
Done: 46.mp4
Done: 47.mp4
Done: 48.mp4
Done: 49.mp4
Done: 50.mp4


In [8]:
data_targets_bads = dict([[i[0], [i[2], i[3], i[4]]] for i in data_targets.values])

data = defaultdict(list)
framerate = 30
for filename in os.listdir("images"):
    filename2 = filename.split("_")
    name, count = int(filename2[0]), int(filename2[1][:-4])
    flag, start, stop = data_targets_bads[name]
    start = start * framerate
    stop = stop * framerate + 15
    if flag:
        if start <= count <= stop:
            data["accident"].append(filename)
        elif count < start:
            data["before_accident"].append(filename)
        elif count > stop:
            data["after_accident"].append(filename)
    elif not flag:
        data["neutral"].append(filename)

for key in data:
    print(key, len(data[key]))

neutral 21697
after_accident 7068
accident 12088
before_accident 2403


In [9]:
data_prep = [[i, 1] for i in data["accident"] + data["after_accident"]]
random.seed(42)
random.shuffle(data_prep)
n = len(data_prep)
for element in ["neutral"]: #before_accident
    if len(data[element]) <= n:
        for i in data[element]:
            data_prep.append([i, 0])
    else:
        random.seed(42)
        for i in random.sample(data[element], n):
            data_prep.append([i, 0])

In [10]:
len(data_prep)

38312

## Обучение

In [11]:
import pandas as pd 
import numpy as np
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
import os

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torchvision import datasets, models, transforms

from PIL import Image
import cv2

from sklearn.metrics import f1_score, accuracy_score
import random
import clip

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, preprocess = clip.load("ViT-B/32", device=device)
model = model.visual.float()
for x in model.parameters(): x.requires_grad = False

100%|███████████████████████████████████████| 338M/338M [00:05<00:00, 60.6MiB/s]


In [13]:
class AccidentDataset(Dataset):
    def __init__(self, data_prep, transform=None,
                 folder:str="/content/"):
        self.data_prep = data_prep
        self.transform = transform
        self.folder = folder

    def __getitem__(self, idx):
        image_name, label = self.data_prep[idx][0], self.data_prep[idx][1]
        image = Image.open(os.path.join(self.folder, image_name))
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label).long()
    
    def __len__(self):
        return len(self.data_prep)

In [14]:
train_transform = preprocess
valid_transform = preprocess

In [15]:
val_videos = [6, 2, 23] # проверяем на видео, которых нет в обучающей выборке
train_df = []
valid_df = []
for filename, label in data_prep:
    name = int(filename.split("_")[0])
    if name not in val_videos:
        train_df.append([filename, label])
    else:
        valid_df.append([filename, label])

In [16]:
random.seed(42)
random.shuffle(train_df)
random.shuffle(valid_df)

In [18]:
len(train_df), len(valid_df)

(35472, 2840)

In [19]:
train_dataset = AccidentDataset(train_df, train_transform, folder="images")
valid_dataset = AccidentDataset(valid_df, valid_transform, folder="images")

In [20]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=128,
                                           pin_memory=True,
                                           num_workers=2)

valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset,
                                           batch_size=128,
                                           pin_memory=True,
                                           num_workers=2)

In [21]:
def train(model, criterion, optimizer, train_dataloader,
          test_dataloader, num_epoch=15, path="/content/",
          score_function=lambda x: x):
    val_acc_log = []
    
    for epoch in tqdm(range(num_epoch)):
        model.train()
        for imgs, labels in tqdm(train_dataloader):
            optimizer.zero_grad()
            imgs = imgs.cuda()
            labels = labels.cuda()
            y_pred = model(imgs)
            loss = criterion(y_pred, labels)
            loss.backward()
            optimizer.step()

        val_pred = []
        val_true = []
        model.eval()
        with torch.no_grad():
            for imgs, labels in tqdm(test_dataloader):
                imgs = imgs.cuda()
                labels = labels.cuda()
                
                pred = model(imgs)
                val_pred.extend(pred.argmax(1).cpu().detach().numpy())
                val_true.extend(labels.cpu().detach().numpy())

        val_acc_log.append(score_function(np.array(val_true), np.array(val_pred)))
        print('Epoch', epoch + 1, f'Val {score_function.__name__}:', val_acc_log[-1])

        full_path = os.path.join(path, f"{model.__class__.__name__}_epoch_{epoch + 1}.pth")
        torch.save(model.state_dict(), full_path)
        print(f"Model saved to: {full_path}")
    return val_acc_log

In [22]:
class ViT_B_32(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.vit = model.cuda()
        self.new_emb = nn.Linear(512, 384).cuda()
        self.norm = nn.LayerNorm(384).cuda()
        self.fc = nn.Linear(384, 2).cuda()

    def forward(self, x):
        h = self.vit(x).float()
        h = self.new_emb(h).float()
        h = self.norm(h).float()
        h = self.fc(h).float()
        return h

In [23]:
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

model = ViT_B_32(model)
criterion = torch.nn.CrossEntropyLoss()

In [24]:
optimizer = torch.optim.AdamW(model.parameters(), lr=0.003)

In [25]:
val_acc_log = train(model,
                    criterion,
                    optimizer,
                    train_loader,
                    valid_loader,
                    num_epoch=2,
                    score_function=accuracy_score)

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/278 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch 1 Val accuracy_score: 1.0
Model saved to: /content/ViT_B_32_epoch_1.pth


  0%|          | 0/278 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch 2 Val accuracy_score: 1.0
Model saved to: /content/ViT_B_32_epoch_2.pth


### Сохранение

In [94]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [95]:
import shutil

In [99]:
shutil.copy("ViT_B_32_epoch_2.pth", "/content/drive/MyDrive/Ivision")

'/content/drive/MyDrive/Ivision/ViT_B_32_epoch_2.pth'