# About This Notebook

This is a implementation of a vanilla resnet34d in Pytorch for the SETI Competition. This scores around 0.97x(0.974-0.976) on LB and 0.9882 CV.

Training Params: -
1. **Dataset**: - Spatial arrngement of the channels (512x512)
2. **Augmentations**: - Resize, HorizontalFlip, VerticalFlip, RandomResizedCrop, ShiftScaleRotate, Cutout, Mixup
3. **Optimizer**: - Adam
4. **Scheduler**: - CosineAnnealingWarmRestarts
5. **Model**: - Resnet34d
6. **Initial Weights**: - Imagenet
5. **Max Epochs**: - 30 (~7 min per epoch)
6. **Saved Weights**: - 5-fold ensemble. Weights having highest OOF score on ROC-AUC metric were saved.

This notebook only contains the inference for the model as described above. If you are interested in EDA or A baseline model please refer the link below.

EDA and Baseline Model Notebook:- https://www.kaggle.com/manabendrarout/nfnet-pytorch-starter-lb-0-95

**If you find this notebook useful and use parts of it in your work, please don't forget to show your appreciation by upvoting this kernel. That keeps me motivated and inspires me to write and share these public kernels. 😊**

# Get GPU Info

In [None]:
!nvidia-smi

# Import

In [None]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

In [None]:
# Asthetics
import warnings
import sklearn.exceptions
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)

# General
from tqdm import tqdm
import pandas as pd
import numpy as np
import os
import glob
import random
pd.set_option('display.max_columns', None)

# Image Aug
import albumentations
from albumentations.pytorch.transforms import ToTensorV2

# Machine Learning
# Deep Learning
import torch
import torchvision
import timm
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# Random Seed Initialize
RANDOM_SEED = 42

def seed_everything(seed=RANDOM_SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything()

# Device Optimization
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
print(f'Using device: {device}')

In [None]:
csv_dir = '../input/seti-breakthrough-listen'
test_dir = '../input/seti-breakthrough-listen/test'
models_dir = '../input/seti-numpy-trained-models/resnet34d/resnet34d_Numpy'

sample_sub_file_path = os.path.join(csv_dir, 'sample_submission.csv')
print(f'Test file: {sample_sub_file_path}')
print(f'Models path: {models_dir}')

In [None]:
test_df = pd.read_csv(sample_sub_file_path)

test_df.head()

In [None]:
def return_filpath(name, folder):
    path = os.path.join(folder, name[0], f'{name}.npy')
    return path

In [None]:
test_df['image_path'] = test_df['id'].apply(lambda x: return_filpath(x, folder=test_dir))

# CFG

In [None]:
params = {
    'model': 'resnet34d',
    'pretrained': False,
    'inp_channels': 1,
    'im_size': 512,
    'on_target_only': False,
    'device': device,
    'batch_size': 64,
    'num_workers' : 2,
    'out_features': 1,
    'debug': False
}

# Augmentations

In [None]:
def get_test_transforms(TTA, DIM = params['im_size']):
    if TTA > 1:
        return albumentations.Compose(
            [
                albumentations.Resize(DIM, DIM),
                albumentations.HorizontalFlip(p=0.5),
                albumentations.VerticalFlip(p=0.5),
                albumentations.RandomResizedCrop(
                    height=DIM, width=DIM, scale=(0.5, 1.0), p=0.5
                ),
                albumentations.ShiftScaleRotate(
                    shift_limit=0.3, scale_limit=0.3, rotate_limit=90,
                    border_mode=0, value=0, mask_value=0, p=0.5
                ),
                ToTensorV2(p=1.0)
            ]
        )
    else:
        return albumentations.Compose(
            [
                albumentations.Resize(DIM, DIM),
                ToTensorV2(p=1.0)
            ]
        )

# Dataset

In [None]:
class SETIDataSet(Dataset):
    def __init__(self, images_filepaths, targets, transform=None, params=params):
        self.images_filepaths = images_filepaths
        self.targets = targets
        self.transform = transform
        self.params = params

    def __len__(self):
        return len(self.images_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.images_filepaths[idx]
        if self.params['on_target_only']:
            image = np.load(image_filepath)[[0, 2, 4]]
        else:
            image = np.load(image_filepath)
        image = image.astype(np.float32)
        image = np.vstack(image).transpose((1, 0))
            
        if self.transform is not None:
            image = self.transform(image=image)["image"]
        else:
            image = image[np.newaxis,:,:]
            image = torch.from_numpy(image).float()
        
        label = torch.tensor(self.targets[idx]).float()
        return image, label

# CNN Model

In [None]:
class AlienNet(nn.Module):
    def __init__(self, model_name=params['model'], out_features=params['out_features'],
                 inp_channels=params['inp_channels'], pretrained=params['pretrained']):
        super().__init__()

        # Feature Extractor (Backbone)
        self.model = timm.create_model(model_name, pretrained=pretrained,
                                       in_chans=inp_channels, num_classes=0)
        out_tensor_size = self.model.num_features

        if model_name.split('_')[0] == 'efficientnet':
            out_channels = self.model.conv_stem.out_channels
            kernel_size = self.model.conv_stem.kernel_size
            stride = self.model.conv_stem.stride
            padding = self.model.conv_stem.padding
            bias = self.model.conv_stem.bias
            self.model.conv_stem = nn.Conv2d(inp_channels, out_channels,
                                             kernel_size=kernel_size, stride=stride,
                                             padding=padding, bias=bias)
            
        # Classifier (Head)
        layers_list = [nn.Linear(out_tensor_size, out_features)]
        self.clf_head = nn.Sequential(*layers_list)
    
    def forward(self, x):
        x = self.model(x)
        x = self.clf_head(x)
        return x

# Prediction

In [None]:
NUM_TTA = 1

In [None]:
if params['debug']:
    test_df = test_df.sample(frac=0.01)

In [None]:
predicted_labels = None
for i in range(NUM_TTA):
    for model_name in glob.glob(models_dir + '/*.pth'):
        model = AlienNet()
        model.load_state_dict(torch.load(model_name))
        model = model.to(params['device'])
        model.eval()

        test_dataset = SETIDataSet(
            images_filepaths = test_df['image_path'].values,
            targets = test_df['target'].values,
            transform = get_test_transforms(NUM_TTA)
        )
        test_loader = DataLoader(
            test_dataset, batch_size=params['batch_size'],
            shuffle=False, num_workers=params['num_workers'],
            pin_memory=True
        )
        
        temp_preds = None
        with torch.no_grad():
            for (images, target) in tqdm(test_loader, desc=f'TTA: {i+1} Predicting. '):
                images = images.to(params['device'], non_blocking=True)
                output = model(images)
                predictions = torch.sigmoid(output).cpu().numpy()
                if temp_preds is None:
                    temp_preds = predictions
                else:
                    temp_preds = np.vstack((temp_preds, predictions))
        
        if predicted_labels is None:
            predicted_labels = temp_preds
        else:
            predicted_labels += temp_preds
        
predicted_labels /= (NUM_TTA*len(glob.glob(models_dir + '/*.pth')))

In [None]:
sub_df = pd.DataFrame()
sub_df['id'] = test_df['id']
sub_df['target'] = predicted_labels

In [None]:
sub_df.head()

In [None]:
sub_df.to_csv('submission.csv', index=False)

**If you find this notebook useful and use parts of it in your work, please don't forget to show your appreciation by upvoting this kernel. That keeps me motivated and inspires me to write and share these public kernels. 😊**