In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import cv2
from collections import Counter
import matplotlib.pyplot as plt 
import pandas as pd
import numpy as np
import torch
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
import torch.optim as torch_optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
import random
import os

In [24]:
class Encoder(nn.Module):
    def __init__(self, emb_dim):
        super(Encoder, self).__init__()
        resnet = models.resnet34(pretrained=True)
        layers = list(resnet.children())[:8]
        self.features1 = nn.Sequential(*layers[:6])  # Blocks 1-6
        self.features2 = nn.Sequential(*layers[6:])  # Blocks 7-8
        self.pool = nn.AdaptiveAvgPool2d((1,1))  # 1 pooling layer
        self.linear = nn.Linear(resnet.fc.in_features, emb_dim)  # we have to pass this vector to LSTM
        self.bn = nn.BatchNorm1d(emb_dim, momentum=0.01)
        
    def forward(self, x):
        x = self.features1(x)  # pass through first 6 layers
        x = self.features2(x)  # pass through last 2 layers
        x = F.relu(x)  # activation
        x = self.pool(x)
        x = x.view(x.shape[0], -1)
        x = self.linear(x)
        x = self.bn(x)
        return x