# Explore BAM! Dataset and Models

I want to do the following:

1. Find the class distribution.
2. Load some suggero examples.
3. Run the latest BAM! Model from results on them and see if predictions match.

In [None]:
from osTools import *
from listOps import *
from PIL import Image
from more_itertools import *
import sys
import h5py
import matplotlib.pyplot as plt
import numpy as np

BAM_H5 = '/misc/extra/data/sumitc/bam.h5'
BAM_DIR = '/misc/extra/data/sumitc/bam'

classNames = list(map(getBaseName, listdir(BAM_DIR)))

with h5py.File(BAM_H5, 'r') as hf : 
    labels = hf.get('labels')[()]
print('BAM! Dataset Size -', labels.shape[0])
print(classNames)

In [None]:
hist, _ = np.histogram(labels, bins=len(classNames))
plt.pie(hist, labels=classNames, autopct='%1.1f%%')
plt.show()

In [None]:
from TripletDataset import *

SUGGERO_DIR = '/misc/extra/data/sumitc/suggero/train'

dataset = TripletSVGDataSet(SUGGERO_DIR)
print('Suggero Dataset -', len(dataset.svgDatas))

In [None]:
import random
from treeOps import *
import albumentations as A
from albumentations.pytorch import ToTensorV2

dataPts = list(zip(dataset.svgDatas, dataset.files))
samples = random.sample(dataPts, k=100)
imgs = [] 
testTransform = A.Compose([
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=1),
    ToTensorV2(),
])
for t, f in samples: 
    r = findRoot(t)
    im = np.array(dataset.loadImage(f, 'whole', r))
    imgs.append(im)
    
for i in range(20) : 
    plt.imshow(np.concatenate(imgs[5 * i: 5 * (i + 1)], axis=1))
    plt.show()

In [None]:
# Load Model And Test
from torchvision.models import *
import torch
import torch.nn as nn

BASE_DIR = os.path.dirname(os.path.abspath(''))
MODEL_DIR = os.path.join(BASE_DIR, 'vectorrvnn', 'results', 'bam_aug2')
model = resnet50(pretrained=True)
model.fc = nn.Linear(2048, 20)
state_dict = torch.load(os.path.join(MODEL_DIR, "epoch_15.pth"))
model.load_state_dict(state_dict['model'])
model = model.float()
model.to("cuda")
model.eval()

modelInput = torch.stack([testTransform(image=im)['image'] for im in imgs]).cuda()
with torch.no_grad() : 
    scores = model(modelInput)
probabilities = torch.nn.functional.softmax(scores, dim=0)
top5_prob, top5_catid = torch.topk(probabilities, 5, dim=1)

for i, im in enumerate(imgs) : 
    plt.imshow(im)
    plt.show()
    for j in range(5) : 
        print(classNames[top5_catid[i][j]])


## Test elements of the Triplet Network

In [None]:
import json
from Triplet import *

with open('./commonConfig.json') as fd : 
        config = json.load(fd)
valData = TripletSVGDataSet(osp.join(config['suggero_dest'], 'val'))
val_dataloader = torch.utils.data.DataLoader(
    valData, 
    batch_size=128, 
    sampler=TripletSampler(valData.svgDatas, 25600, True),
    pin_memory=True,
    num_workers=6,
    collate_fn=lambda x : aggregateDict(x, torch.stack)
)
# Initiate main model.
model = TripletNet(dict(hidden_size=100)).float()

In [None]:
from PositionalEncoding import *

for batch in val_dataloader : 
    break

pe = PositionalEncoding()
pe.to("cuda")
nodeWhole = batch['refWhole'].cuda()
positions = batch['refPositions'].cuda()
vis = (nodeWhole - nodeWhole.min()) / (nodeWhole.max() - nodeWhole.min())
vis = vis.permute((0, 2, 3, 1)).detach().cpu().numpy()
# Checking Input
for i in range(0, vis.shape[0], 32) : 
    plt.imshow(vis[i])
    plt.show()

x = model.conv(nodeWhole)
x_ = pe(x, positions)

print('Norms -', torch.linalg.norm(x_ - x).item(), torch.linalg.norm(x).item())

encoding = pe.pe.squeeze().detach().cpu().numpy()
plt.imshow(encoding)
plt.show()