In [11]:
import numpy as np
import json
import pandas as pd
from tqdm import trange
import argparse
from modules.gem import GeM
from utils.train_util import set_seed
from torch.utils.data import DataLoader
from datasets.dl import GeMData
from datasets.config import GeMConfig
import torch
import os
from tqdm import tqdm
import gc
import torch.nn.functional as F
from matplotlib import pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
from sklearn.metrics import roc_auc_score
# InteractiveShell.ast_node_interactivity = "all"

In [12]:
model = GeM(GeMConfig())

In [13]:
cfg = GeMConfig()
cfg.pic_matrix = np.random.randint(low=0, high=256, size=(1000, 3, 224, 224), dtype=np.uint8)
cfg.dataset = np.random.randint(low=0, high=1000, size=(128, 6))

In [14]:
train_dataset = GeMData(cfg)
train_data_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
input_label = torch.zeros((32), dtype=torch.long)

In [15]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

In [16]:
for epoch in range(3):
    model.train()
    model.zero_grad()
    index = 0
    steps_one_epoch = len(train_data_loader)
    enum_dataloader = tqdm(train_data_loader, total=steps_one_epoch, desc="EP-{} train".format(epoch))
    for data in enum_dataloader:
    #     if index >= steps_one_epoch:
    #         break

        data = data / 256.0
        pred = model(data, 224)
        loss = F.cross_entropy(pred, input_label)

        loss.backward()
        optimizer.step()
        model.zero_grad()

        enum_dataloader.set_description("EP-{} train loss: {}".format(epoch, loss))
        enum_dataloader.refresh()
        index += 1
    
    print('epoch {} end'.format(epoch))

EP-0 train loss: 168.6593017578125: 100%|█████████████████████████████████████████████████| 4/4 [00:59<00:00, 14.94s/it]


epoch 0 end


EP-1 train loss: 17.996931076049805: 100%|████████████████████████████████████████████████| 4/4 [00:42<00:00, 10.54s/it]


epoch 1 end


EP-2 train loss: 8.267402648925781: 100%|█████████████████████████████████████████████████| 4/4 [00:42<00:00, 10.51s/it]

epoch 2 end





In [17]:
# valid dataset
vcfg = GeMConfig()
vcfg.pic_matrix = np.random.randint(low=0, high=256, size=(1000, 3, 224, 224))
dataset1 = np.random.randint(low=0, high=1000, size=(128, 2))
dataset_label = np.random.randint(low=0, high=2, size=(128, 1))
vcfg.dataset = np.concatenate([dataset1, dataset_label], axis=-1)

In [18]:
valid_dataset = GeMData(vcfg, isValid=True)
valid_data_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

In [5]:
# model = GeM(tcfg)

In [19]:
model.eval() 
labels = []
preds = []
with torch.no_grad():
    for data in valid_data_loader:
        input_data = data[:, :-1] / 256.0
        label_data = data[:, -1]

        res = model(input_data, 224, valid_mode=True)
        labels += label_data.cpu().numpy().tolist()
        preds += res.cpu().numpy().tolist()

In [22]:
roc_auc_score(labels, preds)

0.4298717948717949

In [23]:
# test dataset
tcfg = GeMConfig()
tcfg.pic_matrix = np.random.randint(low=0, high=256, size=(1000, 3, 224, 224))
tcfg.dataset = np.random.randint(low=0, high=1000, size=(128))

In [27]:
test_dataset = GeMData(tcfg)
test_data_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [28]:
model.eval() 
preds = []
with torch.no_grad():
    for data in test_data_loader:
        input_data = data / 256.0
        print(data.size())
        res = model.predict(input_data, 224)
        print(res.size())

torch.Size([32, 3, 224, 224])
torch.Size([32, 512])
torch.Size([32, 3, 224, 224])
torch.Size([32, 512])
torch.Size([32, 3, 224, 224])
torch.Size([32, 512])
torch.Size([32, 3, 224, 224])
torch.Size([32, 512])
