In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from ocr.utils.dataset import OCRDataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch
torch.backends.cudnn.enabled=False

In [3]:
device = torch.device("cuda:1")

In [4]:
dataset = OCRDataset('/data/data/', './labels.json')

In [5]:
img, label = dataset[1500]

In [6]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np


def imshow(inp, gt_boxes=[], predict_boxes=[], random=False):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    inp = np.clip(inp, 0, 1)
    fig, ax = plt.subplots(1, figsize=(20, 10))
    
    ax.imshow(inp)
    for i, box in enumerate(gt_boxes):
        rect = patches.Rectangle((box[0], box[1]), box[2] - box[0],
                                 box[3] - box[1], linewidth=2, edgecolor='r', facecolor='none')
        # Add the patch to the Axes
        ax.add_patch(rect)

    color = 'b'
    for i, box in enumerate(predict_boxes):
        if random:
            color = np.random.rand(3)
        rect = patches.Rectangle((box[0], box[1]), box[2] - box[0],
                                 box[3] - box[1], linewidth=1, edgecolor=color, facecolor='none')
        # Add the patch to the Axes
        ax.add_patch(rect)
    ax.imshow(inp)

In [7]:
from torchvision.models.squeezenet import SqueezeNet
full_model = SqueezeNet()
feature_model = nn.Sequential(*list(full_model.features.children())[:-1])
feature_model.to(device)



Sequential(
  (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2))
  (1): ReLU(inplace)
  (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (3): Fire(
    (squeeze): Conv2d(96, 16, kernel_size=(1, 1), stride=(1, 1))
    (squeeze_activation): ReLU(inplace)
    (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
    (expand1x1_activation): ReLU(inplace)
    (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (expand3x3_activation): ReLU(inplace)
  )
  (4): Fire(
    (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
    (squeeze_activation): ReLU(inplace)
    (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
    (expand1x1_activation): ReLU(inplace)
    (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (expand3x3_activation): ReLU(inplace)
  )
  (5): Fire(
    (squeeze): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
    (squeeze_activation): ReLU(inplace

In [8]:
result = feature_model(img.unsqueeze(0).to(device))

In [9]:
result.shape

torch.Size([1, 512, 18, 340])

In [10]:
img.shape

torch.Size([3, 300, 5450])

In [11]:
def _create_anchors(feature_height, feature_width, feat_stride):
    shift_x = np.arange(0, feature_width) * feat_stride
    shift_y = np.array([0] *  feature_width)

    shifts = np.vstack((shift_x, shift_y, shift_x , shift_y)).T
    # generate shifted anchors
    first_anchor = np.array([0, 0, feature_height * feat_stride , feature_height* feat_stride])
    # move to specific gpu.
    # self._anchors = self._anchors.type_as(gt_boxes)
    all_anchors = first_anchor + shifts
    # add bbox deltas to shifted anchors to get proposal
    
    return all_anchors


def np_to_tensor(x, is_cuda=True, dtype=torch.FloatTensor):
    v = torch.from_numpy(x).type(dtype)
    if is_cuda:
        v = v.to(torch.device("cuda"))
    return v

In [12]:
anchors = _create_anchors(18, 340, 16.)

In [13]:
anchors.shape

(340, 4)

In [14]:
index_column = np.zeros((340,1))

In [15]:
anchors = np.hstack((index_column, anchors))

In [16]:
class Lang:
    def __init__(self):
        self.char2index = {}
        self.char2count = {}
        self.index2char = {0: "SOS", 1: "EOS"}
        self.n_characters = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for char in sentence:
            self.addChar(char)

    def addChar(self, character):
        if character not in self.char2index:
            self.char2index[character] = self.n_characters
            self.char2count[character] = 1
            self.index2char[self.n_characters] = character
            self.n_characters += 1
        else:
            self.char2count[character] += 1

In [17]:
import json
import os
with open('data/labels.json') as f:
    data = list(json.load(f).items())
lang = Lang()

In [18]:
SOS_token = 0
EOS_token = 1

In [19]:
for idx, current_data in enumerate(data):
    lang.addSentence(current_data[1])

In [20]:
def indexesFromSentence(lang, sentence):
    return [lang.char2index[char] for char in sentence]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(np.array(indexes), dtype=torch.float, device=device).view(-1, 1)



In [21]:
target_tensor = tensorFromSentence(lang, "Thôn Thọ Vực, Xã Đồng Tháp, Huyện Đan Phượng, Hà Nội")

In [22]:
from torchvision.layers.roi_pool import ROIPool

In [23]:
dtype = torch.float32
x = torch.rand(1, 10, 10, 10, dtype=dtype, device=device)
rois = torch.tensor([[0, 0, 0, 4, 4]],  # format is (xyxy)
                    dtype=dtype, device=device)

pool_h, pool_w = (5, 5)
roi_pool = ROIPool((7, 7), 1.0 / 16)
y = roi_pool(x, rois)

In [24]:
y.shape

torch.Size([1, 10, 7, 7])

In [25]:
result.shape

torch.Size([1, 512, 18, 340])

In [28]:
rs = roi_pool(result, np_to_tensor(anchors, device))

RuntimeError: cuda runtime error (77) : an illegal memory access was encountered at /tmp/pytorch/aten/src/THC/generic/THCTensorCopy.cpp:20

In [27]:
rs.shape

NameError: name 'rs' is not defined

In [28]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.linear = nn.Linear(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        print(input.shape)
        embedded = input.view(1, 1, -1)
        print(embedded.shape)
        output = self.linear(embedded)
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [29]:
import torch.nn.functional as F
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [30]:
encoder = EncoderRNN(512 * 7 * 7, 1000)
encoder.to(device)

RuntimeError: cuda runtime error (77) : an illegal memory access was encountered at /tmp/pytorch/aten/src/THC/generic/THCTensorCopy.cpp:20

In [31]:
input_length = rs.shape[0]
input_tensor = rs
encoder_outputs = torch.zeros(100, encoder.hidden_size, device=device)
encoder_hidden = encoder.initHidden().to(device)

for ei in range(input_length):
    encoder_output, encoder_hidden = encoder(
        input_tensor[ei], encoder_hidden)
    encoder_outputs[ei] = encoder_output[0, 0]

RuntimeError: CUDA error: an illegal memory access was encountered

In [35]:
encoder_outputs.shape

torch.Size([100, 1000])

In [41]:
decoder = DecoderRNN(1000, lang.n_characters).to(device)

In [45]:
decoder_input = torch.tensor([[SOS_token]], device=device)
decoder_hidden = encoder_hidden
print(decoder_input)



tensor([[0]], device='cuda:0')


In [43]:
for di in range(target_tensor.shape[0]):
    decoder_output, decoder_hidden = decoder(
        decoder_input, decoder_hidden)
    decoder_input = target_tensor[di]  # Teacher forcing

RuntimeError: Expected tensor for argument #1 'indices' to have scalar type Long; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)

In [None]:
decoder_output