In [8]:
!pip install imutils

In [9]:
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2
import matplotlib.pyplot as plt 
import pandas as pd
import seaborn as sns
import pytesseract
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import easyocr
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch
from torchvision.models import resnet50, resnet152
import torch.nn as nn
from torchvision import transforms

%matplotlib inline

In [11]:
vs = cv2.VideoCapture("../input/irkutsk/train_dataset_train/train/train1.avi")
vs2 = cv2.VideoCapture("../input/irkutsk/train_dataset_train/train/train2.avi")
train = pd.read_csv('../input/irkutsk/train_dataset_train/train/train.csv')

In [12]:
ret, frame = vs.read()

plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
plt.show()

In [19]:
train_dates = dict.fromkeys(train['date'], 0)

In [20]:
!mkdir train_images

In [89]:
model_version = "microsoft/trocr-base-printed"
processor = TrOCRProcessor.from_pretrained(model_version)
model_ved = VisionEncoderDecoderModel.from_pretrained(model_version)

In [22]:
reader = easyocr.Reader(['en'])

In [23]:
second = 0

while True:
    vs.set(cv2.CAP_PROP_POS_MSEC, second*1000)
    second += 1
    ret, frame = vs.read()
    frame = imutils.resize(frame, width=1024)
    
    frame_crop = frame[35:75, 0:300]
#     plt.imshow(frame_crop) 
#     plt.show()

#     date = pytesseract.image_to_string(frame_crop)

    try:
        date = reader.readtext(frame_crop)[0][1]
    except:
        pixel_values = processor(frame_crop, return_tensors="pt").pixel_values
        generated_ids = model_ved.generate(pixel_values)
        date = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        print('___')
    if len(date) != 23:
        print('___')
        pixel_values = processor(frame_crop, return_tensors="pt").pixel_values
        generated_ids = model_ved.generate(pixel_values)
        date = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    date = date[6:10] + '-' + date[0:5] + ' ' + date[15:23]
    print(date)
    if date in train_dates:
        if train_dates[date] == 0:
            train_dates[date] += 1
            cv2.imwrite(f'./train_images/{date}.jpg', frame)

In [104]:
second = 0

while True:
    if second == 100:
        print(len(os.listdir('./train_images')))
    vs2.set(cv2.CAP_PROP_POS_MSEC, second*1000)
    second += 1
    ret, frame = vs2.read()
    frame = imutils.resize(frame, width=1024)
    
    frame_crop = frame[20:60, 0:310]
#     plt.imshow(frame_crop) 
#     plt.show()

#     date = pytesseract.image_to_string(frame_crop)

    try:
        date = pytesseract.image_to_string(frame_crop)
    except:
        pixel_values = processor(frame_crop, return_tensors="pt").pixel_values
        generated_ids = model_ved.generate(pixel_values)
        date = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        print('___')
    if len(date) != 25:
        print('___')
        pixel_values = processor(frame_crop, return_tensors="pt").pixel_values
        generated_ids = model_ved.generate(pixel_values)
        date = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    date = date[6:10] + '-' + date[0:5] + ' ' + date[15:23]
    print(date)
    if date in train_dates:
        if train_dates[date] == 0:
            train_dates[date] += 1
            cv2.imwrite(f'./train_images/{date}.jpg', frame)

In [26]:
class ImageDataset(Dataset):
  
    def __init__(self, img_path, data, transform):
        self.path = img_path
        self.folder = [x for x in os.listdir(img_path)]
        self.data = data
        self.transform = transform
    
    def __len__(self):
        return len(self.folder)

    def __getitem__(self,idx):
        img_loc = os.path.join(self.path, self.folder[idx])
        image = Image.open(img_loc).convert('RGB')
        image = self.transform(image)
        label_e = int(self.data[self.data['date'] == self.folder[idx][:-4]]['employee'].values[0])
        label_a = int(self.data[self.data['date'] == self.folder[idx][:-4]]['action'].values[0])

        sample = {'image': image, 'labels': {'label_employee': label_e, 'label_action': label_a}}
        return sample

In [106]:
transform = transforms.Compose([
    transforms.Resize((384,384)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
])
train_ds = ImageDataset('./train_images/', train, transform)
train_loader = DataLoader(dataset=train_ds, shuffle=False, batch_size=8)

In [153]:
class MultilabelClassifier(nn.Module):
    def __init__(self, n_employee, n_action):
        super().__init__()
        self.resnet = resnet50(pretrained=True)
        self.resnet_model = nn.Sequential(*(list(self.resnet.children())[:-1]))

        self.employee = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(in_features=2048, out_features=n_employee)
        )
        self.action = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(in_features=2048, out_features=n_action)
        )

    def forward(self, x):
        x = self.resnet_model(x)
        x = torch.flatten(x, 1)

        return {
            'employee': self.employee(x),
            'action': self.action(x)
        }

In [168]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model = MultilabelClassifier(12, 20).to(device)

In [30]:
def criterion(loss_func,outputs,pictures):
    losses = 0
    for i, key in enumerate(outputs):
        losses += loss_func(outputs[key], pictures['labels'][f'label_{key}'].to(device))
    return losses

def training(model,device,lr_rate,num_epochs,train_loader):
    losses = []
    checkpoint_losses = []

    optimizer = torch.optim.Adam(model.parameters(), lr=lr_rate)
    n_total_steps = len(train_loader)

    loss_func = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        for i, pictures in enumerate(train_loader):
            images = pictures['image'].to(device)

            outputs = model(images)

            loss = criterion(loss_func, outputs, pictures)
            losses.append(loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i+1) % (int(n_total_steps/1)) == 0:
                checkpoint_loss = torch.tensor(losses).mean().item()
                checkpoint_losses.append(checkpoint_loss)
                print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {checkpoint_loss:.4f}')
    return checkpoint_losses

In [None]:
checkpoint_losses = training(model, device, 0.0001, 20, train_loader)

In [38]:
# for img_name in os.listdir('./train_images'):
#     img = cv2.imread(f'./train_images/{img_name}')
#     plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
#     plt.show()

In [32]:
vs_test = cv2.VideoCapture("../input/irkutsk/test_dataset_test/test/test.avi")

In [33]:
test = pd.read_csv('../input/irkutsk/test_dataset_test/test/test.csv')

In [34]:
test_dates = dict.fromkeys(test['date'], 0)

In [35]:
!mkdir test_images

In [43]:
second = 0
while True:
    vs_test.set(cv2.CAP_PROP_POS_MSEC, second*1000)
    second += 1
    ret, frame = vs_test.read()
    frame = imutils.resize(frame, width=1024)
    
    frame_crop = frame[25:55, 0:300]
#     plt.imshow(frame_crop) 
#     plt.show()

    date = pytesseract.image_to_string(frame_crop)
    date = date[6:10] + '-' + date[0:5] + ' ' + date[15:23]
    print(date)
    if date in test_dates:
        if test_dates[date] == 0:
            test_dates[date] += 1
            cv2.imwrite(f'./test_images/{date}.jpg', frame)

In [49]:
class TestDataset(Dataset):
  
    def __init__(self, img_path, transform):
        self.path = img_path
        self.folder = [x for x in os.listdir(img_path)]
        self.transform = transform
    
    def __len__(self):
        return len(self.folder)

    def __getitem__(self,idx):
        img_loc = os.path.join(self.path, self.folder[idx])
        image = Image.open(img_loc).convert('RGB')
        image = self.transform(image)
        sample = {'image': image}
        return sample

In [156]:
transform = transforms.Compose([
    transforms.Resize((384,384)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
])
test_ds = TestDataset('./test_images/', transform)
test_loader = DataLoader(dataset=test_ds, shuffle=False, batch_size=1)

In [None]:
pred_actions = []
pred_employee = []
model.eval()
for pictures in test_loader:
    images = pictures['image'].to(device)
    outputs = model(images)
#     print(outputs)
#     _, empl_ind = 
    pred_employee.append(int(torch.max(outputs['employee'],1)[1]))
    pred_actions.append(int(torch.max(outputs['action'],1)[1]))
print(pred_employee, pred_actions)

In [None]:
predictions = pd.DataFrame({'date':os.listdir('./test_images/'), 'employee': pred_employee, 'action': pred_actions})

In [None]:
predictions['date'] = predictions['date'].apply(lambda x: x[:-4])

In [None]:
submit = test.merge(predictions, on='date', how='left')

In [None]:
predictions['employee'].value_counts()

In [69]:
train['employee'].value_counts()

In [None]:
predictions['action'].value_counts()

In [70]:
train['action'].value_counts()

In [163]:
submit['employee'].fillna(11, inplace=True)
submit['action'].fillna(13, inplace=True)

In [164]:
submit.drop(['date'], inplace=True, axis=1)

In [165]:
submit = submit.astype({"employee": "Int64", "action": "Int64"})

In [166]:
submit

In [167]:
submit.to_csv('submit.csv', index=False)