In [1]:
import gc
import os
import random

import math
import copy

import numpy as np
import pandas as pd

from sklearn.model_selection import GroupKFold
from tqdm.notebook import tqdm

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau

from transformers import AdamW
from transformers import get_cosine_schedule_with_warmup

from sklearn.metrics import f1_score,roc_auc_score

In [2]:
df=pd.read_pickle("work_test/test_224_embed.pkl") 

In [3]:
ct_name=df["ct_path"].unique()

In [4]:
class config:

 
    

    ct_len=120

    HIDDEN_SIZE = 128
    BS = 32


In [5]:
class Covid19Dataset(torch.utils.data.Dataset):
    
    def __init__(self, df):
        self.df=df
        self.dfs = [_df for _, _df in self.df.groupby("ct_path")]
        

    def __getitem__(self, index):


        tmp_df = self.dfs[index]
        ct_name=tmp_df.ct_path.values[0]
        

        random.seed(4019)
        if len(tmp_df) >= config.ct_len:
            
            temp_index = [x for x in range(len(tmp_df))]
            target_index = random.sample(temp_index, k = config.ct_len)

        elif len(tmp_df) < config.ct_len:
            target_index = [x for x in range(len(tmp_df))]
            temp = random.choices(target_index, k = config.ct_len - len(target_index))
            target_index += temp
            
        target_index_sort=sorted(target_index)
        
        embed = tmp_df.iloc[target_index_sort, 1].values
        img = np.array([])
        img_list=[]

        for i_embed in embed:
            img_list.append(i_embed.reshape(1,-1))
            
        img=np.concatenate(img_list,axis=0)
        if img.shape!=(config.ct_len,224):
            print(img.shape)

        
        d = {
            "X": torch.tensor(img).float(),
            'id' : ct_name
            

        }

        return d

    def __len__(self):
        return len(self.dfs)

In [6]:
class Covid19Model(nn.Module):
    
    def __init__(self):
        super(Covid19Model, self).__init__()

        
        self.lstm = nn.LSTM(224, config.HIDDEN_SIZE, batch_first=True, bidirectional=True, dropout=0.0, num_layers=4)
        self.dropout1 = nn.Dropout(0.1)
        self.dropout2 = nn.Dropout(0.15)
        self.head = nn.Sequential(
            nn.Linear(config.HIDDEN_SIZE * 2, config.HIDDEN_SIZE//2 ),
            nn.LayerNorm(config.HIDDEN_SIZE//2 ),
            nn.ReLU(),
        
        )
        self.head2=nn.Linear(config.HIDDEN_SIZE//2 , 1)


    def forward(self, X):


        _,(hidden_state,_) = self.lstm(X, None) 
        hidden = torch.cat((hidden_state[-2,:,:], hidden_state[-1,:,:]), dim = 1)
        out = self.dropout1(hidden)
        logits = self.head(out)
        logits=self.dropout2(logits)
        logits=self.head2(logits)


            
        return logits

In [7]:
test_dset = Covid19Dataset(df)

In [8]:
test_loader = DataLoader(test_dset, batch_size=config.BS,
                          pin_memory=True, shuffle=False, drop_last=False, num_workers=8)

In [9]:
device = torch.device("cuda")

In [10]:

pred_path="model_weights/lstm/f1_best_model.bin"
model = Covid19Model()
model.load_state_dict(torch.load(pred_path))
model.to(device);

In [11]:
@torch.inference_mode()
def inference(model, loader):
    
    predicts = []
    IDS=[]
    pred_y=[]
  
    model.eval()
    for d in loader:

        images = d['X'].to(device, dtype=torch.float)
        ids = d['id']

        out = model(images)

        predicts.append(out.cpu())
        pred_y.append(torch.sigmoid(out).cpu().numpy())
        IDS.append(ids)


    
    return pred_y,IDS

In [12]:
predicts,name=inference(model,test_loader)

In [13]:
name = np.concatenate(name)
predicts=np.concatenate(predicts)

In [14]:
dict_all=dict(zip(name, predicts.reshape(-1)))

In [15]:
lstm_pred_df=pd.DataFrame(list(dict_all.items()),
                   columns=['path', 'pred'])

In [16]:
lstm_pred_df.to_csv("output/lstm_pred_df.csv",index=False)