In [38]:
# import package

# model
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import timm

# dataset and transformation
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import models

# display images
from torchvision import utils
import matplotlib.pyplot as plt
%matplotlib inline

# utils
import numpy as np
from glob import glob
import os
from tqdm import tqdm
import pickle
import matplotlib.pyplot as plt


device=torch.device('cpu')
if torch.backends.mps.is_available():
    device=torch.device('mps')
elif torch.cuda.is_available():
    device=torch.device('cuda')
print(f'Using {device} for inference')

Using cuda for inference


In [39]:
# train test split

import csv
import shutil
import pandas as pd

train_list = []
test_list = []

train_csv=pd.read_csv('train.csv',index_col=0)
train_list=train_csv['video_id'].values.tolist()

test_csv=pd.read_csv('test.csv',index_col=0)
test_list=test_csv['video_id'].values.tolist()
#full_data_path = './data/'



print(f'csv len: {len(test_list) + len(train_list)}')
print("train len: ", len(train_list), "\ntest len: ", len(test_list))
# print(f'full data len: {len(os.listdir("./data"))}')

# for test in tqdm(test_list):
#   if os.path.exists('./medium_15287/' + test + '.jpg'):  
#     img_path = glob('./medium_15287/' + test + '.jpg')[0]
#     shutil.copyfile(img_path, './test/' + test + '.jpg')
  
#   else:
#     print(test)

# for train in tqdm(train_list):
#   if os.path.exists('./medium_15287/' + train + '.jpg'):  
#     img_path = glob('./medium_15287/' + train + '.jpg')[0]
#     shutil.copyfile(img_path, './train/' + train + '.jpg')
#   else:
#     print(train)




csv len: 15287
train len:  13758 
test len:  1529


In [40]:
print(f'test len: {len(os.listdir("./test"))}')
print(f'train len: {len(os.listdir("./train"))}')

test len: 1529
train len: 13758


In [41]:
from PIL import Image
from torch.utils.data import Dataset, DataLoader


class SampleDataset(Dataset):
    def __init__(self, phase='test'):
        # self.path = './sample_data'
        self.phase = phase
        
        self.img_list = glob(self.phase + '/*')

        self.transform =transforms.Compose([
            transforms.Resize(size=272, interpolation=torchvision.transforms.InterpolationMode.BICUBIC, max_size=None, antialias=True),
            transforms.CenterCrop(size=(240, 240)),
            transforms.ToTensor(),
            transforms.Normalize(mean=torch.tensor([0.5000, 0.5000, 0.5000]), std=torch.tensor([0.5000, 0.5000, 0.5000]))
        ])
    
    def __len__(self):
        return len(self.img_list)
    
    def __getitem__(self, idx):
        img_path = self.img_list[idx]

        img = Image.open(img_path)
        img = self.transform(img)

        vid = img_path.split('/')[-1][:-4]
        
        return img, vid

In [42]:
# Prepare sample input data.

batch_size = 64

test_dataset = SampleDataset(phase='test')
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

train_dataset = SampleDataset(phase='train')
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

In [43]:
import copy
import logging
import timm
import pathlib

_logger = logging.getLogger(__name__)

def my_create_timm_model(**init_args):
    # HACK: fix the bug for feature_only=True and checkpoint_path != ""
    # https://github.com/rwightman/pytorch-image-models/issues/488
    if init_args.get("checkpoint_path", "") != "" and init_args.get("features_only", True):
        init_args = copy.deepcopy(init_args)
        full_model_name = init_args["model_name"]
        modules = timm.models.list_modules()
        # find the mod which has the longest common name in model_name
        mod_len = 0
        for m in modules:
            if m in full_model_name:
                cur_mod_len = len(m)
                if cur_mod_len > mod_len:
                    mod = m
                    mod_len = cur_mod_len
        if mod_len >= 1:
            if hasattr(timm.models.__dict__[mod], "default_cfgs"):
                ckpt_path = init_args.pop("checkpoint_path")
                ckpt_url = pathlib.Path(ckpt_path).resolve().as_uri()
                _logger.warning(f"hacking model pretrained url to {ckpt_url}")
                timm.models.__dict__[mod].default_cfgs[full_model_name]["url"] = ckpt_url
                init_args["pretrained"] = True
        else:
            raise ValueError(f"model_name {full_model_name} has no module in timm")

    backbone = timm.create_model(**init_args)
    return backbone

In [44]:
#train encoder
# import os
# os.environ['TORCH_HOME'] = './'

efficientnet = my_create_timm_model(model_name='efficientnet_b1_pruned', pretrained=True, features_only=True, checkpoint_path='./effnetb1_pruned-bea43a3a.pth')

test_input=torch.randn(1,3,240,240)
print(test_input.shape)
o=efficientnet(test_input)
for f in o:
    print(f.shape)
layers=0
for child in efficientnet.children():
    for param in child.parameters():
        param.requires_grad=False
    layers+=1
print(layers)

hacking model pretrained url to file:///C:/Users/iblue/OneDrive/%EB%B0%94%ED%83%95%20%ED%99%94%EB%A9%B4/OneDrive%20-%20%EC%84%B1%EA%B7%A0%EA%B4%80%EB%8C%80%ED%95%99%EA%B5%90/%ED%95%99%EA%B5%90%EA%B3%BC%EC%A0%9C/%EC%9D%B8%EC%A7%80%ED%94%84/feature_autoencoder.ipynb/effnetb1_pruned-bea43a3a.pth


torch.Size([1, 3, 240, 240])
torch.Size([1, 16, 120, 120])
torch.Size([1, 12, 60, 60])
torch.Size([1, 35, 30, 30])
torch.Size([1, 67, 15, 15])
torch.Size([1, 320, 8, 8])
4


In [45]:
reduced_vector=1000

class DeepAutoencoder(torch.nn.Module):
    def __init__(self):
        super().__init__()        
        self.efficientnet = efficientnet

        self.encoder=torch.nn.Sequential(
            torch.nn.Flatten(),
            torch.nn.Linear(320*8*8,reduced_vector*4),
            torch.nn.ReLU(),
            torch.nn.Linear(reduced_vector*4, reduced_vector)
        )
          
        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(reduced_vector, reduced_vector*4),
            torch.nn.ReLU(),
            torch.nn.Linear(reduced_vector*4, 320*8*8),
        )
  
    def forward(self, x):
        feature_map=self.efficientnet(x)[-1]
        encoded = self.encoder(feature_map)
        decoded = self.decoder(encoded)
        return decoded, feature_map
  
# Instantiating the model and hyperparameters


In [46]:
model = DeepAutoencoder()
model.to(device)
criterion = torch.nn.MSELoss()
params_to_update=[]

for name, param in model.encoder.named_parameters():
    if param.requires_grad==True:
        params_to_update.append(param)

for name, param in model.decoder.named_parameters():
    if param.requires_grad==True:
        params_to_update.append(param)

In [47]:
optimizer = torch.optim.Adam(params_to_update, lr=0.0001)

In [48]:
num_epochs=500

In [49]:
best_loss = 1e9
train_losses = []
val_losses = []

for epoch in range(num_epochs):    
    model.train()
    train_loss = 0

    with tqdm(train_dataloader, unit="batch") as batch:
        for img, vid in batch:
            batch.set_description(f"Epoch {epoch+1}")
            img=img.to(device)
            preds, feat_map=model(img)

            preds=preds.flatten()
            feat_map=feat_map.flatten()
            
            batch_loss=criterion(preds.to(torch.float32),feat_map.to(torch.float32))
            train_loss += batch_loss.item()
            
            optimizer.zero_grad()
            batch_loss.backward()
            optimizer.step()
            batch.set_postfix(loss=batch_loss.item())

    train_losses.append(train_loss / len(train_dataloader))

    model.eval()
    val_loss = 0

    with torch.no_grad():
        with tqdm(test_dataloader, unit="batch") as batch:
            for img, vid in batch:
                batch.set_description(f"Test Epoch {epoch+1}")
                img=img.to(device)
                preds, feat_map=model(img)

                preds=preds.flatten()
                feat_map=feat_map.flatten()
                
                batch_loss=criterion(preds.to(torch.float32),feat_map.to(torch.float32))
                val_loss += batch_loss.item()
                
                batch.set_postfix(loss=batch_loss.item())

    loss = val_loss / len(test_dataloader)
    val_losses.append(loss)

    # graw loss graph
    x = range(epoch + 1)
    plt.plot(x, train_losses, x, val_losses)
    plt.legend(['train_loss', 'val_loss'])
    plt.savefig('loss_graph.png')

    # save log
    with open('log.txt', '+a') as f:
        f.write(f'Epoch: {epoch} | loss: {loss}\n')

        if loss < best_loss:
            torch.save(model, f'autoencoder_best.pickle')
            f.write(f'Save Model in epoch {epoch}\n')
        


Epoch 1: 100%|██████████| 215/215 [06:48<00:00,  1.90s/batch, loss=7.67]
Test Epoch 1: 100%|██████████| 24/24 [00:37<00:00,  1.54s/batch, loss=7.83]
Epoch 2: 100%|██████████| 215/215 [01:17<00:00,  2.78batch/s, loss=6.66]
Test Epoch 2: 100%|██████████| 24/24 [00:07<00:00,  3.13batch/s, loss=7.03]
Epoch 3: 100%|██████████| 215/215 [01:19<00:00,  2.72batch/s, loss=6.08]
Test Epoch 3: 100%|██████████| 24/24 [00:08<00:00,  2.99batch/s, loss=6.61]
Epoch 4: 100%|██████████| 215/215 [01:19<00:00,  2.71batch/s, loss=5.68]
Test Epoch 4: 100%|██████████| 24/24 [00:08<00:00,  2.93batch/s, loss=6.34]
Epoch 5: 100%|██████████| 215/215 [01:19<00:00,  2.70batch/s, loss=5.38]
Test Epoch 5: 100%|██████████| 24/24 [00:08<00:00,  2.83batch/s, loss=6.16]
Epoch 6: 100%|██████████| 215/215 [01:19<00:00,  2.70batch/s, loss=5.17]
Test Epoch 6: 100%|██████████| 24/24 [00:07<00:00,  3.02batch/s, loss=6.03]
Epoch 7: 100%|██████████| 215/215 [01:20<00:00,  2.68batch/s, loss=4.99]
Test Epoch 7: 100%|██████████| 24

KeyboardInterrupt: 

In [None]:
torch.save(model, 'autoencoder.pickle')

In [None]:
##END OF CODE

In [None]:
# save_data = {}

# with torch.no_grad():
#   for img, vid in tqdm(train_dataloader):
#     out = model.efficientnet(img)
#     out = model.encoder(out)
#     for b in range(batch_size):
#       if b < len(vid):
#         save_data[vid[b]] = out[b]

# print(f'train len: {len(train_list)}')
# print(f'data len: {len(save_data)}')

# with open('train.pickle', 'wb') as f:
#   pickle.dump(save_data, f, pickle.HIGHEST_PROTOCOL)

  0%|          | 0/215 [00:00<?, ?it/s]


RuntimeError: Mismatched Tensor types in NNPack convolutionOutput

In [None]:

# with torch.no_grad():
#   for img, vid in tqdm(test_dataloader):
#     out = model.efficientnet(img)
#     out = model.encoder(out)
#     print(out, out.shape)
#     for b in range(batch_size):
#       if b < len(vid):
#         save_data[vid[b]] = out[b]

# print(f'test len: {len(train_list)}')
# print(f'data len: {len(save_data)}')

# with open('test.pickle', 'wb') as f:
#   pickle.dump(save_data, f, pickle.HIGHEST_PROTOCOL)

데이터 로더 안쓰는 버전

In [None]:
# # without dataloader ver

# from PIL import Image
# import pickle

# save_data = {}

# transform = transform=transforms.Compose([
#                                transforms.ToTensor(),
#                                transforms.Pad(padding=(0, 140), padding_mode='reflect'),
#                                transforms.Resize((224, 224))
#                       ])

# for vid in tqdm(test_list):
#   img_path = './test/' + vid + '.jpg'

#   img = Image.open(img_path)
#   img = transform(img).unsqueeze(0)

#   # print(img.shape)

#   features = efficientnet.extract_features(img)
#   # print(features.shape)

#   save_data[vid] = features

# with open('test.pickle', 'wb') as f:
#   pickle.dump(save_data, f, pickle.HIGHEST_PROTOCOL)