In [6]:
%reload_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd

import pathlib
import string
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    

import torch

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KDTree

from base import BaseDataset
from models import Segmentation


class AttrDict(dict):
    __getattr__ = dict.__getitem__
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

In [7]:
def sample_from_letter(fnm_list, n_items, case = None):
    #case: ('lower','upper')
    df = pd.DataFrame()
    df['fname'] = [fnm.split('.')[0] for fnm in fnm_list]
    spl = df.fname.str.split('_')
    df['letter'] = spl.apply(lambda x: x[0])
    df['case'] = spl.apply(lambda x: x[-1])
    df['class'] = df['letter']+df['case']
    df['class'] = pd.Categorical(df['class'])
    df['label'] = df['class'].cat.codes    

    n_classes = df['class'].nunique()
    
    if case is not None:
        df = df[df.case == case]
    samples = df.groupby('label').apply(lambda x: x.sample(n_items))
    fnm_labels = samples[['fname','label']].set_index('fname').label.to_dict()
    return n_classes, fnm_labels

class RankingDataset(BaseDataset):
    @staticmethod
    def num_classes():
        return self.num_classes

    def __init__(
        self,
        root_dir,
        fnm_labels,
        num_classes,
        _center_and_scale=True,
        random_rotate=False,
    ):
        """
        Args:
            _center_and_scale (bool, optional): Whether to center and scale the solid. Defaults to True.
            random_rotate (bool, optional): Whether to apply random rotations to the solid in 90 degree increments. Defaults to False.
        """
        # path = pathlib.Path(root_dir)
        self.random_rotate = random_rotate
        self.num_classes = num_classes
        
        self.lbs = fnm_labels

        file_paths = [pathlib.Path(root_dir+fnm+'.bin') for fnm in fnm_labels.keys()]
        print(file_paths[0], file_paths[0].exists())
        self.load_graphs(file_paths, _center_and_scale)
        print("Done loading {} files".format(len(self.data)))

    def load_one_graph(self, file_path):
        # Load the graph using base class method
        sample = super().load_one_graph(file_path)
        # Additionally get the label from the filename and store it in the sample dict

        sample["label"] = torch.tensor([self.lbs[str(file_path.stem)]]).long()
        return sample

    def _collate(self, batch):
        collated = super()._collate(batch)
        collated["label"] =  torch.cat([x["label"] for x in batch], dim=0)
        return collated
    
def encode(model, loader, device):
    embs_list = []
    labels_list = []
    with torch.no_grad():  
        for batch in loader:
            inputs = batch["graph"].to(device)
            inputs.ndata["x"] = inputs.ndata["x"].permute(0, 3, 1, 2)
            inputs.edata["x"] = inputs.edata["x"].permute(0, 2, 1)
            embs_list.append(model.encode_part(inputs).to(device=torch.device('cpu')))
                        
            labels_list.append(batch["label"].to(device=torch.device('cpu')))
    return embs_list, labels_list

def cals_map_all(test_loaders, model, device):
    model = model.eval()
    metr = []
    for loader in test_loaders:
        e_list, l_list = encode(model, loader, device)
        embs = torch.cat(e_list,dim=0).numpy()
        lbs = torch.cat(l_list,dim=0).numpy()
        metr.append(calc_map(embs, lbs))
    return np.mean(metr)

def calc_map(X, labels, K = 5):
    tree = KDTree(X, leaf_size=40)  # creating kd tree
    _, ind = tree.query(X, k=K+1)  # quering nearest items

    is_valid_label = (labels[ind[:,1:]] == labels.reshape(-1,1)).astype(int)

    cum_sum = np.cumsum(is_valid_label, axis=1)
    P_K = cum_sum/np.arange(1, K+1).reshape(1,-1)
    AP_K = P_K.sum(axis=1) / np.clip(cum_sum[:,-1],1, K)

    return AP_K.mean()

In [8]:
#!conda install pytorch==1.11.0 cudatoolkit=11.3 -c pytorch
print("Torch version:",torch.__version__)

print("Is CUDA enabled?",torch.cuda.is_available())


Torch version: 1.12.1
Is CUDA enabled? False


In [2]:
!pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113 --user

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu113
Collecting torchvision==0.12.0+cu113
  Using cached https://download.pytorch.org/whl/cu113/torchvision-0.12.0%2Bcu113-cp39-cp39-win_amd64.whl (5.4 MB)
Collecting torchaudio==0.11.0
  Using cached https://download.pytorch.org/whl/cu113/torchaudio-0.11.0%2Bcu113-cp39-cp39-win_amd64.whl (573 kB)
Installing collected packages: torchvision, torchaudio
  Attempting uninstall: torchvision
    Found existing installation: torchvision 0.11.2+cu102
    Uninstalling torchvision-0.11.2+cu102:
      Successfully uninstalled torchvision-0.11.2+cu102
  Attempting uninstall: torchaudio
    Found existing installation: torchaudio 0.10.1+cu102
    Uninstalling torchaudio-0.10.1+cu102:
      Successfully uninstalled torchaudio-0.10.1+cu102
Successfully installed torchaudio-0.11.0+cu113 torchvision-0.12.0+cu113


DEPRECATION: vtk -PKG-VERSION has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of vtk or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063


In [18]:
args = AttrDict({})
args.batch_size = 4
args.random_rotate = False
args.num_workers = 0
args.checkpoint = 'D:/NIR/best.ckpt'
device = torch.device('cpu') #изначально было ('cuda:2'), может потом стоит поменять обратно, если чет не будет получаться опять

model = Segmentation.load_from_checkpoint(args.checkpoint).model.to(device = device)

  rank_zero_warn(
Lightning automatically upgraded your loaded checkpoint from v1.3.8 to v1.9.0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint --file D:\NIR\best.ckpt`


In [19]:
fnm_list = os.listdir('D:/NIR/SolidLetters/graph_with_eattr')
N_ITEMS_PER_CLASS = 2

# creating loaders for SolidLetters dataset quering
test_loaders = []
for case in ('lower', 'upper'):
    ncl, fnm_labels = sample_from_letter(fnm_list, N_ITEMS_PER_CLASS, case)
    dset = RankingDataset('D:/NIR/SolidLetters/graph_with_eattr/', 
                           fnm_labels, 
                           ncl)
    test_loaders.append(dset.get_dataloader(batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers))

D:\NIR\SolidLetters\graph_with_eattr\a_Rhodium Libre_lower.bin True


100%|██████████| 52/52 [00:00<00:00, 795.17it/s]

Done loading 52 files





D:\NIR\SolidLetters\graph_with_eattr\a_Love Ya Like A Sister_upper.bin True


100%|██████████| 52/52 [00:00<00:00, 892.44it/s]

Done loading 52 files





In [20]:
# calculating metric
cals_map_all(test_loaders, model, device)

AttributeError: 'UVNetSegmenter' object has no attribute 'encode_part'

In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd

import pathlib
import string
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
import torch

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KDTree

from base1 import BaseDataset
from models1 import Classification


class AttrDict(dict):
    __getattr__ = dict.__getitem__
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

In [9]:
def sample_from_letter(fnm_list, n_items, case = None):
    #case: ('lower','upper')
    df = pd.DataFrame()
    df['fname'] = [fnm.split('.')[0] for fnm in fnm_list]
    spl = df.fname.str.split('_')
    df['letter'] = spl.apply(lambda x: x[0])
    df['case'] = spl.apply(lambda x: x[-1])
    df['class'] = df['letter']+df['case']
    df['class'] = pd.Categorical(df['class'])
    df['label'] = df['class'].cat.codes    

    n_classes = df['class'].nunique()
    
    if case is not None:
        df = df[df.case == case]
    samples = df.groupby('label').apply(lambda x: x.sample(n_items))
    fnm_labels = samples[['fname','label']].set_index('fname').label.to_dict()
    return n_classes, fnm_labels

class RankingDataset(BaseDataset):
    @staticmethod
    def num_classes():
        return self.num_classes

    def __init__(
        self,
        root_dir,
        fnm_labels,
        num_classes,
        _center_and_scale=True,
        random_rotate=False,
    ):
        """
        Args:
            _center_and_scale (bool, optional): Whether to center and scale the solid. Defaults to True.
            random_rotate (bool, optional): Whether to apply random rotations to the solid in 90 degree increments. Defaults to False.
        """
        # path = pathlib.Path(root_dir)
        self.random_rotate = random_rotate
        self.num_classes = num_classes
        
        self.lbs = fnm_labels

        file_paths = [pathlib.Path(root_dir+fnm+'.bin') for fnm in fnm_labels.keys()]
        print(file_paths[0], file_paths[0].exists())
        self.load_graphs(file_paths, _center_and_scale)
        print("Done loading {} files".format(len(self.data)))

    def load_one_graph(self, file_path):
        # Load the graph using base class method
        sample = super().load_one_graph(file_path)
        # Additionally get the label from the filename and store it in the sample dict

        sample["label"] = torch.tensor([self.lbs[str(file_path.stem)]]).long()
        return sample

    def _collate(self, batch):
        collated = super()._collate(batch)
        collated["label"] =  torch.cat([x["label"] for x in batch], dim=0)
        return collated
    
def encode(model, loader, device):
    embs_list = []
    labels_list = []
    with torch.no_grad():  
        for batch in loader:
            inputs = batch["graph"].to(device)
            inputs.ndata["x"] = inputs.ndata["x"].permute(0, 3, 1, 2)
            inputs.edata["x"] = inputs.edata["x"].permute(0, 2, 1)
            embs_list.append(model.encode_part(inputs).to(device=torch.device('cpu')))
                        
            labels_list.append(batch["label"].to(device=torch.device('cpu')))
    return embs_list, labels_list

def cals_map_all(test_loaders, model, device):
    model = model.eval()
    metr = []
    for loader in test_loaders:
        e_list, l_list = encode(model, loader, device)
        embs = torch.cat(e_list,dim=0).numpy()
        lbs = torch.cat(l_list,dim=0).numpy()
        metr.append(calc_map(embs, lbs))
    return np.mean(metr)

def calc_map(X, labels, K = 5):
    tree = KDTree(X, leaf_size=40)  # creating kd tree
    _, ind = tree.query(X, k=K+1)  # quering nearest items

    is_valid_label = (labels[ind[:,1:]] == labels.reshape(-1,1)).astype(int)

    cum_sum = np.cumsum(is_valid_label, axis=1)
    P_K = cum_sum/np.arange(1, K+1).reshape(1,-1)
    AP_K = P_K.sum(axis=1) / np.clip(cum_sum[:,-1],1, K)

    return AP_K.mean()

In [5]:
torch.cuda.is_available()

False

In [4]:
!pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu122

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu122


DEPRECATION: vtk -PKG-VERSION has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of vtk or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063


In [10]:
args = AttrDict({})
args.batch_size = 128
args.random_rotate = False
args.num_workers = 150
args.checkpoint = 'D:/NIR/results/classif3/0808/094121/best.ckpt'
device = torch.device('cpu') #изначально было ('cuda:2'), может потом стоит поменять обратно, если чет не будет получаться опять

model = Classification.load_from_checkpoint(args.checkpoint).model.to(device = device)

In [11]:
fnm_list = os.listdir('D:/NIR/SolidLetters/graph_with_eattr')
N_ITEMS_PER_CLASS = 100

# creating loaders for SolidLetters dataset quering
test_loaders = []
for case in ('lower', 'upper'):
    ncl, fnm_labels = sample_from_letter(fnm_list, N_ITEMS_PER_CLASS, case)
    dset = RankingDataset('D:/NIR/SolidLetters/graph_with_eattr/', 
                           fnm_labels, 
                           ncl)
    test_loaders.append(dset.get_dataloader(batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers))

D:\NIR\SolidLetters\graph_with_eattr\a_Nova Script_lower.bin True


100%|██████████| 2600/2600 [00:27<00:00, 94.02it/s] 


Done loading 2600 files




D:\NIR\SolidLetters\graph_with_eattr\a_Saira SemiCondensed SemiBold_upper.bin True


100%|██████████| 2600/2600 [00:27<00:00, 93.74it/s] 


Done loading 2600 files




In [None]:
import time
from tqdm import tqdm
# calculating metric
tqdm(cals_map_all(test_loaders, model, device))

In [12]:
%%timeit
calc_map(X, labels)

NameError: name 'X' is not defined

In [68]:
%%timeit
calc_map(X, labels)

17.5 ms ± 10.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
