### Housekeeping code snippets
imports, and code to download images

In [None]:
import requests, time
import fastbook
from tqdm import tqdm
# fastbook.setup_book()
from fastbook import *
from fastai.vision.widgets import *

In [None]:
import os
key = os.environ.get('AZURE_SEARCH_KEY', 'XXX')

In [None]:
def search_images_bing(key, term, min_sz=128, max_images=150, offset=0):
    limit = 150
    left = max_images
    li = []
    while left > 0:
        params = dict(q=term, count=left, min_height=min_sz, min_width=min_sz, offset=offset)
        search_url = "https://api.bing.microsoft.com/v7.0/images/search"
        response = requests.get(search_url, headers={"Ocp-Apim-Subscription-Key":key}, params=params)
        response.raise_for_status()
        left = max_images - limit
        max_images = left
        offset += limit
        li += list(response.json()['value'])
    return L(li)

In [None]:
#download images
inst_types = ['ukulele', 'acoustic guitar', 'electric guitar', "4 string bass guitar"]
path = Path('instruments')

# 2-fold test set
# path = Path('instruments_test') --> 20 images 
# path = Path('instruments_test_1') --> 20 images

if not path.exists():
    path.mkdir()
    for o in tqdm(inst_types):
        dest = (path/o)
        dest.mkdir(exist_ok=True)
        results = search_images_bing(key, f'{o}', max_images = 150)
        download_images(dest, urls=results.attrgot('contentUrl'))

### Training with multiple hyperparameters

In [None]:
from itertools import product
import torch
import wandb
import gc
import requests, time
import fastbook
from tqdm import tqdm
fastbook.setup_book()
from fastbook import *
from fastai.vision.widgets import *
from fastai.callback.wandb import *

size = [(224, 64), (336, 16), (448, 16)]
min_scale = [0.2]
max_scale = [0.5]
min_zoom = [1.0, 1.1, 1.2, 1.3]
max_zoom = [1.3]


def verify_images(fns):
    failed = []
    for f in fns:
        if not verify_image(f):
            failed.append(f)
    return L(failed)

def validate(path, classes, learn):
    correct = 0
    count = 0
    for o in classes:
        dest = (path/o)
        for i in get_image_files(dest):
            count += 1
            pred = learn.predict(i)
            if o == pred[0]:
                correct += 1
    return correct/count

def train_and_validate(path, test_path, classes, size, bs, min_scale, max_scale, min_zoom, max_zoom):
    train_db = DataBlock(blocks=(ImageBlock, CategoryBlock), 
                  get_items = get_image_files,
                  splitter=RandomSplitter(valid_pct=0.2, seed=42),
                  get_y = parent_label,
                  item_tfms=RandomResizedCrop(size, min_scale=min_scale, max_scale=max_scale),
                  batch_tfms = aug_transforms(min_zoom=min_zoom, max_zoom=max_zoom))
    dls = train_db.dataloaders(path, bs=bs)
    fname= f'{size}_{min_zoom}_{max_zoom}'
    learn = cnn_learner(dls, resnet18, metrics=error_rate, cbs=[SaveModelCallback(fname=fname)])
    learn.fine_tune(0)
    accuracy = validate(test_path, classes, learn)
    if not os.path.exists('models_exp'):
        os.makedirs('models_exp')
    learn.export(f'models_exp/{fname}.pkl')
    del dls, train_db, learn 
    gc.collect()
    torch.cuda.empty_cache()
    return accuracy

results = []
trials = list(product(size, min_scale, max_scale, min_zoom, max_zoom))
path = Path('instruments')
test_path = Path('instruments_test')
failed = verify_images(get_image_files(path))
failed.map(Path.unlink)
failed = verify_images(get_image_files(test_path))
failed.map(Path.unlink)

classes = 'acoustic guitar', 'electric guitar', 'ukulele', '4 string bass guitar' 
for trial in trials:
    si, min_scale, max_scale, min_zoom, max_zoom = trial
    size, bs = si
    result = train_and_validate(path, test_path, classes, size, bs, min_scale, max_scale, min_zoom, max_zoom)
    results.append((trial, result))
    print(trial, result)

In [None]:
results

### Testing the models performance on 2 unseen test sets

In [None]:
path = Path('models_exp')
test_path = [Path('instruments_test'), Path('instruments_test_1')] 
# test_path = Path('instruments_test')
classes = 'acoustic guitar', 'electric guitar', 'ukulele', '4 string bass guitar' 

results_all = []
for model in path.ls(file_exts='.pkl'):
    learn = None 
    gc.collect()
    torch.cuda.empty_cache()
    learn = load_learner(model)
    accuracy = []
    for fold in test_path:
        accuracy.append(validate(fold, classes, learn))
    results_all.append((model, accuracy))
    learn = None
    del learn 
    gc.collect()
    torch.cuda.empty_cache()
print(results_all)

In [None]:
# sorted(results_all, key= lambda x: x[1][1])[::-1][:6]

best_models = [i[0] for i in sorted(results_all, key= lambda x: x[1][1])[::-1][:6]]
# best_models

### Ensemble from the best 6 models
Voting from each model

In [None]:
from collections import Counter

def validate_ensemble_models(best_models):
    models = [(model, load_learner(model)) for model in best_models]
    test_path = [Path('instruments_test'), Path('instruments_test_1')] 
    classes = 'acoustic guitar', 'electric guitar', 'ukulele', '4 string bass guitar'
    count = 0
    correct = 0
    wrong_preds = []
    
    acc = []
    for model in models:
        accuracy = 0
        for fold in test_path:
            accuracy += validate(fold, classes, model[1])
        accuracy /= 2
        acc.append((model[0], accuracy))
        
    for o in classes:
        for fold in test_path:
            dest = (fold/o)
            for i in get_image_files(dest):
                count += 1
                pred = [learn[1].predict(i)[0] for learn in models]
                pred_class = Counter(pred).most_common(1)[0][0]
                if o == pred_class:
                    correct += 1
                else:
                    print(dest, i, pred_class, Counter(pred).most_common(1))
                    wrong_preds.append((dest, i, pred_class))
                    
    return correct/count, wrong_preds, acc
a, w, acc = validate_ensemble_models(best_models)

In [None]:
a, w, acc