In [1]:
import wandb
import torch
import yaml
from ecallisto_model import (
    ResNet,
)
from torch.utils.data import DataLoader
from torchvision.transforms import Compose
from tqdm import tqdm
import os
import pandas as pd
from ecallisto_dataset import (
    EcallistoDatasetBinary,
    custom_resize,
    remove_background,
)


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
checkpoint_reference = "vincenzo-timmel/FlareSense-v2/best_model:v324"
CONFIG_PATH = "configs/t999.yml"

In [7]:
api = wandb.Api()
artifact = api.artifact(checkpoint_reference)
artifact_dir = artifact.download()

[34m[1mwandb[0m: Downloading large artifact best_model:v324, 128.04MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:7.4


In [3]:
with open(CONFIG_PATH, "r") as file:
    config = yaml.safe_load(file)

In [14]:
def load_model(checkpoint_path, config, device):
    # Initialize the model
    model = ResNet(
        1,
        resnet_type=config["model"]["model_type"],
        optimizer_name="adam",
        learning_rate=1000,
    )

    # Load checkpoint
    checkpoint = torch.load(checkpoint_path, map_location=device)
    model.load_state_dict(checkpoint["state_dict"])

    return model

model = load_model(artifact.file(), config, "cuda")

In [24]:
# Transforms
resize_func = Compose(
    [
        lambda x: custom_resize(x, tuple(config["model"]["input_size"])),
    ]
)


from datasets import DatasetDict, load_dataset

def prepare_datasets(config):
    ds_train = load_dataset(config["data"]["train_path"], split="train")
    ds_valid = load_dataset(config["data"]["val_path"], split="validation")
    ds_test = load_dataset(config["data"]["test_path"], split="test")

    dd = DatasetDict()
    dd["train"] = ds_train
    dd["test"] = ds_test
    dd["validation"] = ds_valid
    return dd


def prepare_dataloaders(ds_train, ds_valid, ds_test, batch_size):
    train_dataloader = DataLoader(
        ds_train, batch_size=batch_size, shuffle=False, persistent_workers=False
    )
    val_dataloader = DataLoader(
        ds_valid, batch_size=batch_size, shuffle=False, persistent_workers=False
    )
    test_dataloader = DataLoader(
        ds_test, batch_size=batch_size, shuffle=False, persistent_workers=False
    )
    return train_dataloader, val_dataloader, test_dataloader

In [None]:
dd = prepare_datasets(config)

In [28]:
t_dl, val_dl, test_dl = prepare_dataloaders(
    EcallistoDatasetBinary(dd["train"], normalization_transform=resize_func),
    EcallistoDatasetBinary(dd["validation"], normalization_transform=resize_func),
    EcallistoDatasetBinary(dd["test"], normalization_transform=resize_func),
    4
)

In [34]:
def extract_Features(model, dataloader, device):
    model.eval()
    model.to(device)
    features_list = []

    # Dictionary to store the features
    features_dict = {}

    # Define a hook function to capture the features
    def hook_fn(module, input, output):
        features_dict['features'] = output

    # Register the hook on the avgpool layer
    hook_handle = model.resnet.avgpool.register_forward_hook(hook_fn)

    with torch.no_grad():
        for inputs, _, _, _ in tqdm(dataloader):
            inputs = inputs.to(device)
            _ = model(inputs)  # Forward pass
            features = features_dict['features']
            features = torch.flatten(features, 1)
            features_list.append(features.cpu())

    # Remove the hook
    hook_handle.remove()

    return features_list

In [38]:
dd['test'][0]

{'file_path': '/mnt/nas05/data01/vincenzo/ecallisto/data/ALASKA-COHOE_63/0/2023-02-27_18-46-00.parquet',
 'label': False,
 'antenna': 'ALASKA-COHOE_63',
 'datetime': Timestamp('2023-02-27 18:46:00')}

In [35]:
labels = extract_Features(model, test_dl, "cuda")

  0%|          | 0/7638 [00:00<?, ?it/s]


FileNotFoundError: [Errno 2] No such file or directory: '/mnt/nas05/data01/vincenzo/ecallisto/data/ALASKA-COHOE_63/0/2023-02-27_18-46-00.parquet'