# Downloading the data

Note: you should fill in `source_name` and `source_path` based on the correct name and URL for the larger dataset(s). `original_tool_image.zip` is done for you.

In [None]:
source_name = "coupe_images.zip"
source_path = ""

In [None]:
import os
if not os.path.exists('./original_tool_image.zip'):
  ! wget -O original_tool_image.zip https://www.dropbox.com/s/l77u0mahlfyzi1r/original_tool_images.zip?dl=0
if not os.path.exists(source_name):
  ! wget -O $source_name $source_path

--2023-03-01 15:38:45--  https://www.dropbox.com/s/l77u0mahlfyzi1r/original_tool_images.zip?dl=0
Resolving www.dropbox.com (www.dropbox.com)... 162.125.1.18, 2620:100:6016:18::a27d:112
Connecting to www.dropbox.com (www.dropbox.com)|162.125.1.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: /s/raw/l77u0mahlfyzi1r/original_tool_images.zip [following]
--2023-03-01 15:38:45--  https://www.dropbox.com/s/raw/l77u0mahlfyzi1r/original_tool_images.zip
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc7e3189eb2565101ec9e89a2185.dl.dropboxusercontent.com/cd/0/inline/B3bQgLixRq7lja6K3t9fV6C0Tig9XnSCKWVgqIAMVzyoP7g-ZMQ7fVlWWzU632CB_4QrUsfuFEPVOfSuLq5nDHRK2ei3c8dHA4Zr9AfrvtgNQEcnXGPFUnm4_3WA-4mEjhE_-wFeTf0tsAkh644RTGvBwj7ldZjfjh7r3yRfmemo2g/file# [following]
--2023-03-01 15:38:46--  https://uc7e3189eb2565101ec9e89a2185.dl.dropboxusercontent.com/cd/0/inline/B3bQgLixRq7lja6K3t9fV6C0Tig9XnSCKWVgqIA

# 0. Setup

## Git Clone

### From Source

In [None]:
! rm -rf -- GLAMOR

In [None]:
! git clone -b master https://github.com/asuprem/GLAMOR

Cloning into 'GLAMOR'...
remote: Enumerating objects: 9540, done.[K
remote: Counting objects: 100% (1443/1443), done.[K
remote: Compressing objects: 100% (468/468), done.[K
remote: Total 9540 (delta 893), reused 1373 (delta 828), pack-reused 8097
Receiving objects: 100% (9540/9540), 2.53 MiB | 5.39 MiB/s, done.
Resolving deltas: 100% (6284/6284), done.


In [None]:
!pip install -e GLAMOR/

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Obtaining file:///content/GLAMOR
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting torchinfo>=1.6.5
  Downloading torchinfo-1.7.2-py3-none-any.whl (22 kB)
Collecting sentencepiece>=0.1.96
  Downloading sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m25.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: sentencepiece, torchinfo, ednaml
  Running setup.py develop for ednaml
Successfully installed ednaml-0.1.5 sentencepiece-0.1.97 torchinfo-1.7.2


###  From PyPi

In [None]:
#! python -V

In [None]:
#! pip3 install --pre ednaml==0.1.4

# ----------- Restart Notebook to Finish EdnaML Installation ----------------

# 1 Building the Crawler

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import torch
import ednaml
import glob, os
#from ednaml.core import EdnaDeploy, EdnaML
torch.__version__

'1.13.1+cu116'

## 1.1 Crawler class definition

In [None]:
# Here we define our custom model class
from ednaml.crawlers import Crawler
from zipfile import ZipFile # might be useful in unzipping!
import glob

class CarZamCrawler(Crawler):
  def __init__(self, logger, file_name = "original_tool_images.zip", **kwargs): # Add your own arguments if needed!
    self.classes = {}
    self.metadata = {}
    self.metadata["train"] = {}
    self.metadata["test"] = {}
    self.metadata["val"] = {}
    self.metadata["train"]["crawl"] = []  # <------ THIS NEEDS TO BE POPULATED
    self.metadata["test"]["crawl"] = []   # <------ THIS NEEDS TO BE POPULATED
    self.metadata["val"]["crawl"] = []    # <------ THIS NEEDS TO BE POPULATED

    # YOUR CODE HERE ------ POPULATE self.classes and self.metadata's empty lists ---
    fdest = "otool"
    if not os.path.exists(fdest):
      with ZipFile(file_name, 'r') as zip: 
          # extract all files to another directory
          zip.extractall(fdest)

    fllist = glob.glob(os.path.join(fdest, "original_tool_images/*.jpg"))
    tokeep = ["Convertible", "Coupe", "Crossover" "Diesel", "Hybrid", "Sedan", "SUV"]

    tuple_prelim = [self.getinittuple(item) for item in fllist]
    tuple_filtered = [item for item in tuple_prelim if item[0][0] in tokeep]  # keeps our limited set of makes
    tuple_expanded = [self.expand(item) for item in tuple_filtered]

    import random
    random.seed(3456)
    random.shuffle(tuple_expanded)

    splits = 0.8
    train_sets = int(len(tuple_expanded)*0.8)
    val_sets = int(len(tuple_expanded)*0.1)

    

    # structure:  (path, type, color, year, make)
    # idx           0     1     2     3     4
    types = list(set([item[1] for item in tuple_expanded]))
    colors = list(set([item[2] for item in tuple_expanded]))
    years = list(set([item[3] for item in tuple_expanded]))
    makes = list(set([item[4] for item in tuple_expanded]))

    self.classes["vtype"] = len(types)
    self.classes["color"] = len(colors)
    self.classes["year"] = len(years)
    self.classes["make"] = len(makes)

    self.type_lookup = {item:idx for idx,item in enumerate(types)}
    self.color_lookup = {item:idx for idx,item in enumerate(colors)}
    self.year_lookup = {item:idx for idx,item in enumerate(years)}
    self.make_lookup = {item:idx for idx,item in enumerate(makes)}

    tuple_expanded = [(item[0], self.type_lookup[item[1]], self.color_lookup[item[2]], self.year_lookup[item[3]], self.make_lookup[item[4]]) for item in tuple_expanded]

    self.metadata["train"]["crawl"] = tuple_expanded[:train_sets]
    self.metadata["val"]["crawl"] = tuple_expanded[train_sets:val_sets]
    self.metadata["test"]["crawl"] = tuple_expanded[train_sets+val_sets:]

    # structure:  (path, vtype, color, year, make)
    # idx           0     1     2     3     4
    # -------------------------------------------------------------------------------

    self.metadata["train"]["classes"] = self.classes
    self.metadata["test"]["classes"] = self.classes
    self.metadata["val"]["classes"] = self.classes

  def getinittuple(self, item):
    return (os.path.splitext(os.path.basename(item))[0].split(" "), item)

  def expand(self, item): # item is a tuple: (["make","color", etc...], "path")
    return tuple([item[1]]+item[0])
    


## 1.2 Testing the Crawler

In [None]:
kwargs = {
    "logger" : None,
    "file_name" : "original_tool_image.zip",
    # add any other kwargs here...
}

In [None]:
crawler = CarZamCrawler(**kwargs)

In [None]:
crawler.classes # You should get the classes here

{'vtype': 5, 'color': 13, 'year': 13, 'make': 34}

In [None]:
print(crawler.metadata["train"]["crawl"][:5])  # You should get the list of tuples here

[('otool/original_tool_images/SUV Black 2014 Ford Expedition EL.jpg', 1, 1, 10, 7), ('otool/original_tool_images/Coupe White 2015 Chevrolet Camaro.jpg', 2, 8, 12, 28), ('otool/original_tool_images/SUV Gray 2018 Toyota 4Runner.jpg', 1, 11, 9, 10), ('otool/original_tool_images/SUV White 2014 Honda CR-V.jpg', 1, 8, 10, 6), ('otool/original_tool_images/Sedan Black 2019 Chevrolet Cruze.jpg', 4, 1, 3, 28)]


# 2 Statistics

Here, you can add your code to explore the data and obtain whatever plots you need. If you already have the code somewhere else, then you can keep it as is, and leave this blank. If you want everything in one place, feel free to use this section!

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Write code to collect info on # makes, models, year, type

# 3 Building a simple, single classification model for `original_tool_images.zip`

Here, we will built a simple model to classify a subset of original_tool_images.zip. Then we will expand to multiple labels. Finally, we will tackle the larger-scale datasets problem.

## 3.1 Single classification (Vehicle Type)

We will try with the vehicle type classifier first. Our architecture looks like:


[<img src="https://i.redd.it/cvjvsdlq4yx91.png" width="550"/>]

In [None]:
class_name = "vtype"  # Make sure to change this to whatever name you used for type in your `original_tool_images` crawler
class_idx = 1         # Make sure to change this to whetever index `type` is in your Crawler's tuple!
path_idx = 0          # Change this to whichever index in tuple has path
crawler_args = {"file_name" : "original_tool_image.zip"}

In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
from ednaml.core import EdnaML
from ednaml.generators import ClassificationGenerator

eml = EdnaML(config = "./GLAMOR/profiles/CarZam/base_config.yml", config_inject=[
    ("SAVE.MODEL_QUALIFIER", class_name)
])

eml.cfg.EXECUTION.DATAREADER.CRAWLER_ARGS = crawler_args
eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["pathidx"] = path_idx
eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["annotationidx"] = class_idx
eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["classificationclass"] = class_name

eml.addGeneratorClass(ClassificationGenerator)
eml.addCrawlerClass(CarZamCrawler)

Injected key-value pair:  SAVE.MODEL_QUALIFIER, vtype


In [None]:
eml.apply()

15:39:08 ****************************************
15:39:08 
15:39:08 
15:39:08 Using the following configuration:
15:39:08 DEPLOYMENT:
  DATAREADER:
    CRAWLER_ARGS: {}
    DATAREADER: DataReader
    DATASET_ARGS: {}
    GENERATOR: null
    GENERATOR_ARGS: {}
  DEPLOY: BaseDeploy
  DEPLOYMENT_ARGS: {}
  EPOCHS: 1
  OUTPUT_ARGS: {}
  PLUGIN:
    HOOKS: always
    RESET: false
EXECUTION:
  DATAREADER:
    CRAWLER_ARGS:
      file_name: original_tool_image.zip
    DATAREADER: DataReader
    DATASET_ARGS:
      annotationidx: 1
      classificationclass: vtype
      pathidx: 0
    GENERATOR: null
    GENERATOR_ARGS: {}
  EPOCHS: 10
  FP16: false
  MODEL_SERVING: Unused
  OPTIMIZER_BUILDER: ClassificationOptimizer
  PLUGIN:
    HOOKS: always
    RESET: false
  SKIPEVAL: false
  TEST_FREQUENCY: 1
  TRAINER: ClassificationTrainer
  TRAINER_ARGS:
    accumulation_steps: 4
LOGGING:
  INPUT_SIZE: null
  STEP_VERBOSE: 100
LOSS:
- KWARGS:
  - {}
  LABEL: ''
  LAMBDAS:
  - 1.0
  LOSSES:
  - Softma

46827520/46827520 bytes [████████████████████████████████████████████████████████████████████████████████████████████████████]
Download of resnet18-5c106cde.pth to https://download.pytorch.org/models/resnet18-5c106cde.pth completed


15:39:08 No previous stop detected. Will start from epoch 0
15:39:08 Loaded BaseStorage from ednaml.storage to build Storage
15:39:08 Reading data with DataReader DataReader
15:39:08 Default CRAWLER is <class 'ednaml.crawlers.Crawler'>
15:39:08 Default DATASET is <class 'torch.utils.data.dataset.Dataset'>
15:39:08 Default GENERATOR is <class 'ednaml.generators.ImageGenerator.ImageGenerator'>
15:39:08 Updating GENERATOR to queued class ClassificationGenerator
15:39:08 Updating CRAWLER to CarZamCrawler
15:39:09 Generated training data generator with 648 training data points
15:39:09 Running classification model with classes: {'vtype': {'classes': 5}}
15:39:09 Generated test data/query generator
15:39:09 Loaded classification_model_builder from ednaml.models to build model
15:39:09 Finished instantiating model with ClassificationResnet architecture
15:39:09 Adding plugins after constructing model
15:39:09 No saved model weights provided.
15:39:14 Model Summary retured the following error:

In [None]:
eml.train()

15:39:14 Starting training
15:39:14 Logging to:	origtoolimgs-v1-singleclass-vtype-logger.log
15:39:14 Models will be saved to local directory:	origtoolimgs-v1-singleclass-vtype
15:39:14 Models will be saved with base name:	origtoolimgs-v1_epoch[].pth
15:39:14 Optimizers will be saved with base name:	origtoolimgs-v1_epoch[]_optimizer.pth
15:39:14 Schedulers will be saved with base name:	origtoolimgs-v1_epoch[]_scheduler.pth
15:39:14 Performing initial evaluation...
15:39:21 Obtained features, validation in progress
15:39:21 Accuracy: 22.222%
15:39:21 Micro F-score: 0.222
15:39:21 Weighted F-score: 0.086
15:39:21 Starting training from 0
15:39:22 Parameter Group `opt-1`: Starting epoch 0 with 20 steps and learning rate 1.00000E-05
15:39:28 ********** Completed epoch 0 **********
15:39:28 Model evaluation triggered, but gradients still need accumulation. Will evaluate after accumulation.
15:39:28 Model save triggered, but gradients still need accumulation. Will save after accumulation.
15

In [None]:
resp = eml.eval()

15:40:49 Obtained features, validation in progress
15:40:49 Accuracy: 67.901%
15:40:49 Micro F-score: 0.679
15:40:49 Weighted F-score: 0.625


## 3.2 Single classification (Vehicle Color)

Next, let's do vehicle color. Architecture remains the same, but we now focus on color features.

In [None]:
class_name = "color"   # Make sure to change this to whatever name you used for make in your `original_tool_images` crawler
class_idx = 2         # Make sure to change this to whetever index `color` is in your Crawler's tuple!
path_idx = 0          # Change this to whichever index in tuple has path
crawler_args = {"file_name" : "original_tool_image.zip"}

In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
from ednaml.core import EdnaML
from ednaml.generators import ClassificationGenerator

eml = EdnaML(config = "./GLAMOR/profiles/CarZam/base_config.yml", config_inject=[
    ("SAVE.MODEL_QUALIFIER", class_name)
])

eml.cfg.EXECUTION.DATAREADER.CRAWLER_ARGS = crawler_args
eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["pathidx"] = path_idx
eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["annotationidx"] = class_idx
eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["classificationclass"] = class_name

eml.addGeneratorClass(ClassificationGenerator)
eml.addCrawlerClass(CarZamCrawler)

Injected key-value pair:  SAVE.MODEL_QUALIFIER, color


In [None]:
eml.apply()

15:51:45 ****************************************
15:51:45 
15:51:45 
15:51:45 Using the following configuration:
15:51:45 DEPLOYMENT:
  DATAREADER:
    CRAWLER_ARGS: {}
    DATAREADER: DataReader
    DATASET_ARGS: {}
    GENERATOR: null
    GENERATOR_ARGS: {}
  DEPLOY: BaseDeploy
  DEPLOYMENT_ARGS: {}
  EPOCHS: 1
  OUTPUT_ARGS: {}
  PLUGIN:
    HOOKS: always
    RESET: false
EXECUTION:
  DATAREADER:
    CRAWLER_ARGS:
      file_name: original_tool_image.zip
    DATAREADER: DataReader
    DATASET_ARGS:
      annotationidx: 2
      classificationclass: color
      pathidx: 0
    GENERATOR: null
    GENERATOR_ARGS: {}
  EPOCHS: 10
  FP16: false
  MODEL_SERVING: Unused
  OPTIMIZER_BUILDER: ClassificationOptimizer
  PLUGIN:
    HOOKS: always
    RESET: false
  SKIPEVAL: false
  TEST_FREQUENCY: 1
  TRAINER: ClassificationTrainer
  TRAINER_ARGS:
    accumulation_steps: 4
LOGGING:
  INPUT_SIZE: null
  STEP_VERBOSE: 100
LOSS:
- KWARGS:
  - {}
  LABEL: ''
  LAMBDAS:
  - 1.0
  LOSSES:
  - Softma

In [None]:
eml.train()

15:51:46 Starting training
15:51:46 Logging to:	origtoolimgs-v1-singleclass-color-logger.log
15:51:46 Models will be saved to local directory:	origtoolimgs-v1-singleclass-color
15:51:46 Models will be saved with base name:	origtoolimgs-v1_epoch[].pth
15:51:46 Optimizers will be saved with base name:	origtoolimgs-v1_epoch[]_optimizer.pth
15:51:46 Schedulers will be saved with base name:	origtoolimgs-v1_epoch[]_scheduler.pth
15:51:46 Performing initial evaluation...
15:51:47 Obtained features, validation in progress
15:51:47 Accuracy: 2.469%
15:51:47 Micro F-score: 0.025
15:51:47 Weighted F-score: 0.033
15:51:47 Starting training from 0
15:51:47 Parameter Group `opt-1`: Starting epoch 0 with 20 steps and learning rate 1.00000E-05
15:51:54 ********** Completed epoch 0 **********
15:51:54 Model evaluation triggered, but gradients still need accumulation. Will evaluate after accumulation.
15:51:54 Model save triggered, but gradients still need accumulation. Will save after accumulation.
15:

In [None]:
resp = eml.eval()

15:53:16 Obtained features, validation in progress
15:53:16 Accuracy: 81.481%
15:53:16 Micro F-score: 0.815
15:53:16 Weighted F-score: 0.796


## 3.3 Single classification (Vehicle Make)

And finally, a make classifier

In [None]:
class_name = "make"   # Make sure to change this to whatever name you used for make in your `original_tool_images` crawler
class_idx = 4         # Make sure to change this to whetever index `make` is in your Crawler's tuple!
path_idx = 0          # Change this to whichever index in tuple has path
crawler_args = {"file_name" : "original_tool_image.zip"}

In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
from ednaml.core import EdnaML
from ednaml.generators import ClassificationGenerator

eml = EdnaML(config = "./GLAMOR/profiles/CarZam/base_config.yml", config_inject=[
    ("SAVE.MODEL_QUALIFIER", class_name)
])

eml.cfg.EXECUTION.DATAREADER.CRAWLER_ARGS = crawler_args
eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["pathidx"] = path_idx
eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["annotationidx"] = class_idx
eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["classificationclass"] = class_name

eml.addGeneratorClass(ClassificationGenerator)
eml.addCrawlerClass(CarZamCrawler)

Injected key-value pair:  SAVE.MODEL_QUALIFIER, make


In [None]:
eml.apply()

15:53:16 ****************************************
15:53:16 
15:53:16 
15:53:16 Using the following configuration:
15:53:16 DEPLOYMENT:
  DATAREADER:
    CRAWLER_ARGS: {}
    DATAREADER: DataReader
    DATASET_ARGS: {}
    GENERATOR: null
    GENERATOR_ARGS: {}
  DEPLOY: BaseDeploy
  DEPLOYMENT_ARGS: {}
  EPOCHS: 1
  OUTPUT_ARGS: {}
  PLUGIN:
    HOOKS: always
    RESET: false
EXECUTION:
  DATAREADER:
    CRAWLER_ARGS:
      file_name: original_tool_image.zip
    DATAREADER: DataReader
    DATASET_ARGS:
      annotationidx: 4
      classificationclass: make
      pathidx: 0
    GENERATOR: null
    GENERATOR_ARGS: {}
  EPOCHS: 10
  FP16: false
  MODEL_SERVING: Unused
  OPTIMIZER_BUILDER: ClassificationOptimizer
  PLUGIN:
    HOOKS: always
    RESET: false
  SKIPEVAL: false
  TEST_FREQUENCY: 1
  TRAINER: ClassificationTrainer
  TRAINER_ARGS:
    accumulation_steps: 4
LOGGING:
  INPUT_SIZE: null
  STEP_VERBOSE: 100
LOSS:
- KWARGS:
  - {}
  LABEL: ''
  LAMBDAS:
  - 1.0
  LOSSES:
  - Softmax

In [None]:
eml.train()

15:53:17 Starting training
15:53:17 Logging to:	origtoolimgs-v1-singleclass-make-logger.log
15:53:17 Models will be saved to local directory:	origtoolimgs-v1-singleclass-make
15:53:17 Models will be saved with base name:	origtoolimgs-v1_epoch[].pth
15:53:17 Optimizers will be saved with base name:	origtoolimgs-v1_epoch[]_optimizer.pth
15:53:17 Schedulers will be saved with base name:	origtoolimgs-v1_epoch[]_scheduler.pth
15:53:17 Performing initial evaluation...
15:53:18 Obtained features, validation in progress
15:53:18 Accuracy: 2.469%
15:53:18 Micro F-score: 0.025
15:53:18 Weighted F-score: 0.011
15:53:18 Starting training from 0
15:53:18 Parameter Group `opt-1`: Starting epoch 0 with 20 steps and learning rate 1.00000E-05
15:53:25 ********** Completed epoch 0 **********
15:53:25 Model evaluation triggered, but gradients still need accumulation. Will evaluate after accumulation.
15:53:25 Model save triggered, but gradients still need accumulation. Will save after accumulation.
15:53

In [None]:
resp = eml.eval()

15:54:47 Obtained features, validation in progress
15:54:47 Accuracy: 37.037%
15:54:47 Micro F-score: 0.370
15:54:47 Weighted F-score: 0.354


# 4. Multiclass classifiers

Multiclass classifiers try to classify multiple things at once, using the same features. Sometimes it works, if the features are colocated or have some overlap. Othertimes, it doesn't work very well. We can examine this in case of our small dataset first.

## 4.1 Multi-class classification (color-type)

Now we will try a model that performs vehicle type AND vehicle color classification together. The config is already prepared for this in `profiles/color_type.yml`.

Our architecture looks like:

[<img src="https://i.redd.it/7ndvmdlq4yx91.png" width="550"/>]

In [None]:
path_idx = 0          # Change this to whichever index in tuple has path
crawler_args = {"file_name" : "original_tool_image.zip"}

In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
from ednaml.core import EdnaML
from ednaml.generators import ClassificationGenerator

eml = EdnaML(config = ["./GLAMOR/profiles/CarZam/base_config.yml","./GLAMOR/profiles/CarZam/color_type.yml"])

eml.cfg.EXECUTION.DATAREADER.CRAWLER_ARGS = crawler_args
eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["pathidx"] = path_idx
# We have already set these in config
#eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["annotationidx"] = class_idx
#eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["classificationclass"] = class_name

#eml.addGeneratorClass(MultiClassificationGenerator)
eml.addCrawlerClass(CarZamCrawler)

In [None]:
eml.apply()

15:54:47 ****************************************
15:54:47 
15:54:47 
15:54:47 Using the following configuration:
15:54:47 DEPLOYMENT:
  DATAREADER:
    CRAWLER_ARGS: {}
    DATAREADER: DataReader
    DATASET_ARGS: {}
    GENERATOR: null
    GENERATOR_ARGS: {}
  DEPLOY: BaseDeploy
  DEPLOYMENT_ARGS: {}
  EPOCHS: 1
  OUTPUT_ARGS: {}
  PLUGIN:
    HOOKS: always
    RESET: false
EXECUTION:
  DATAREADER:
    CRAWLER_ARGS:
      file_name: original_tool_image.zip
    DATAREADER: DataReader
    DATASET_ARGS:
      annotationidx:
      - 2
      - 1
      classificationclass:
      - color
      - vtype
      pathidx: 0
    GENERATOR: MultiClassificationGenerator
    GENERATOR_ARGS: {}
  EPOCHS: 10
  FP16: false
  MODEL_SERVING: Unused
  OPTIMIZER_BUILDER: ClassificationOptimizer
  PLUGIN:
    HOOKS: always
    RESET: false
  SKIPEVAL: false
  TEST_FREQUENCY: 1
  TRAINER: MultiClassificationTrainer
  TRAINER_ARGS:
    accumulation_steps: 4
LOGGING:
  INPUT_SIZE: null
  STEP_VERBOSE: 100
LOSS:

In [None]:
eml.train()

15:54:47 Starting training
15:54:47 Logging to:	origtoolimgs-v1-multiclass-color-vtype-logger.log
15:54:47 Models will be saved to local directory:	origtoolimgs-v1-multiclass-color-vtype
15:54:47 Models will be saved with base name:	origtoolimgs-v1_epoch[].pth
15:54:47 Optimizers will be saved with base name:	origtoolimgs-v1_epoch[]_optimizer.pth
15:54:47 Schedulers will be saved with base name:	origtoolimgs-v1_epoch[]_scheduler.pth
15:54:47 Performing initial evaluation...
15:54:48 Obtained features, validation in progress
15:54:48 Metrics	colorloss	typeloss
15:54:48 Accuracy	color: 0.012	vtype: 0.235
15:54:48 M F-Score	color: 0.012	vtype: 0.235
15:54:48 W F-Score	color: 0.013	vtype: 0.089
15:54:48 Starting training from 0
15:54:49 Parameter Group `opt-1`: Starting epoch 0 with 20 steps and learning rate 1.00000E-05
15:54:56 ********** Completed epoch 0 **********
15:54:56 Model evaluation triggered, but gradients still need accumulation. Will evaluate after accumulation.
15:54:56 Mod

In [None]:
resp = eml.eval()

15:56:18 Obtained features, validation in progress
15:56:18 Metrics	colorloss	typeloss
15:56:18 Accuracy	color: 0.716	vtype: 0.679
15:56:18 M F-Score	color: 0.716	vtype: 0.679
15:56:18 W F-Score	color: 0.679	vtype: 0.660


## 4.2 Multi-class classification (color-type-make)

Now we will try a model that performs vehicle type vehicle color, and vehicle make classification together. The config is already prepared for this in `profiles/color_type_make.yml`

Our architecture now looks like:

[<img src="https://i.redd.it/8sbfqblq4yx91.png" width="550"/>]

In [None]:
path_idx = 0          # Change this to whichever index in tuple has path
crawler_args = {"file_name" : "original_tool_image.zip"}

In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
from ednaml.core import EdnaML
from ednaml.generators import ClassificationGenerator

eml = EdnaML(config = ["./GLAMOR/profiles/CarZam/base_config.yml","./GLAMOR/profiles/CarZam/color_type_make.yml"])

eml.cfg.EXECUTION.DATAREADER.CRAWLER_ARGS = crawler_args
eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["pathidx"] = path_idx
# We have already set these in config
#eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["annotationidx"] = class_idx
#eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["classificationclass"] = class_name

#eml.addGeneratorClass(ClassificationGenerator)
eml.addCrawlerClass(CarZamCrawler)

In [None]:
eml.apply()

15:56:18 ****************************************
15:56:18 
15:56:18 
15:56:18 Using the following configuration:
15:56:18 DEPLOYMENT:
  DATAREADER:
    CRAWLER_ARGS: {}
    DATAREADER: DataReader
    DATASET_ARGS: {}
    GENERATOR: null
    GENERATOR_ARGS: {}
  DEPLOY: BaseDeploy
  DEPLOYMENT_ARGS: {}
  EPOCHS: 1
  OUTPUT_ARGS: {}
  PLUGIN:
    HOOKS: always
    RESET: false
EXECUTION:
  DATAREADER:
    CRAWLER_ARGS:
      file_name: original_tool_image.zip
    DATAREADER: DataReader
    DATASET_ARGS:
      annotationidx:
      - 2
      - 1
      - 4
      classificationclass:
      - color
      - vtype
      - make
      pathidx: 0
    GENERATOR: MultiClassificationGenerator
    GENERATOR_ARGS: {}
  EPOCHS: 10
  FP16: false
  MODEL_SERVING: Unused
  OPTIMIZER_BUILDER: ClassificationOptimizer
  PLUGIN:
    HOOKS: always
    RESET: false
  SKIPEVAL: false
  TEST_FREQUENCY: 1
  TRAINER: MultiClassificationTrainer
  TRAINER_ARGS:
    accumulation_steps: 4
LOGGING:
  INPUT_SIZE: null
  

In [None]:
eml.train()

15:56:18 Starting training
15:56:18 Logging to:	origtoolimgs-v1-multiclass-color-vtype-make-logger.log
15:56:18 Models will be saved to local directory:	origtoolimgs-v1-multiclass-color-vtype-make
15:56:18 Models will be saved with base name:	origtoolimgs-v1_epoch[].pth
15:56:18 Optimizers will be saved with base name:	origtoolimgs-v1_epoch[]_optimizer.pth
15:56:18 Schedulers will be saved with base name:	origtoolimgs-v1_epoch[]_scheduler.pth
15:56:18 Performing initial evaluation...
15:56:19 Obtained features, validation in progress
15:56:19 Metrics	colorloss	typeloss	makeloss
15:56:19 Accuracy	color: 0.272	vtype: 0.185	make: 0.012
15:56:19 M F-Score	color: 0.272	vtype: 0.185	make: 0.012
15:56:19 W F-Score	color: 0.117	vtype: 0.103	make: 0.002
15:56:19 Starting training from 0
15:56:20 Parameter Group `opt-1`: Starting epoch 0 with 20 steps and learning rate 1.00000E-05
15:56:27 ********** Completed epoch 0 **********
15:56:27 Model evaluation triggered, but gradients still need accum

In [None]:
resp = eml.eval()

15:57:49 Obtained features, validation in progress
15:57:49 Metrics	colorloss	typeloss	makeloss
15:57:49 Accuracy	color: 0.840	vtype: 0.679	make: 0.198
15:57:49 M F-Score	color: 0.840	vtype: 0.679	make: 0.198
15:57:49 W F-Score	color: 0.832	vtype: 0.657	make: 0.181


# 5. Multibranch classification

Now we will try a model that uses multiple branches, each branch for a specific label, for classification. Then we will fuse the branches to classify one more things. So total, three classifications from a single model.

## 5.1 Vehicle color and type, fused to classify vehicle make
Now we will try a model that performs vehicle type AND vehicle color classification together, using 2 different branches, and fuses the results together for make classification. The config is already prepared for this in `profiles/multibranch-ctm.yml`

Our architecture looks like:

[<img src="https://i.redd.it/q0urublq4yx91.png" width="550"/>]

Here, each branch yields its own prediction, and also sends features to the fusion branch (which is the make module in our case)

In [None]:
path_idx = 0          # Change this to whichever index in tuple has path
crawler_args = {"file_name" : "original_tool_image.zip"}

In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
from ednaml.core import EdnaML
from ednaml.generators import ClassificationGenerator

eml = EdnaML(config = ["./GLAMOR/profiles/CarZam/base_config.yml","./GLAMOR/profiles/CarZam/multibranch-ctm.yml"])

eml.cfg.EXECUTION.DATAREADER.CRAWLER_ARGS = crawler_args
eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["pathidx"] = path_idx
# We have already set these in config
#eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["annotationidx"] = class_idx
#eml.cfg.EXECUTION.DATAREADER.DATASET_ARGS["classificationclass"] = class_name

#eml.addGeneratorClass(ClassificationGenerator)
eml.addCrawlerClass(CarZamCrawler)

In [None]:
eml.apply()

15:57:49 ****************************************
15:57:49 
15:57:49 
15:57:49 Using the following configuration:
15:57:49 DEPLOYMENT:
  DATAREADER:
    CRAWLER_ARGS: {}
    DATAREADER: DataReader
    DATASET_ARGS: {}
    GENERATOR: null
    GENERATOR_ARGS: {}
  DEPLOY: BaseDeploy
  DEPLOYMENT_ARGS: {}
  EPOCHS: 1
  OUTPUT_ARGS: {}
  PLUGIN:
    HOOKS: always
    RESET: false
EXECUTION:
  DATAREADER:
    CRAWLER_ARGS:
      file_name: original_tool_image.zip
    DATAREADER: DataReader
    DATASET_ARGS:
      annotationidx:
      - 2
      - 1
      - 4
      classificationclass:
      - color
      - vtype
      - make
      pathidx: 0
    GENERATOR: MultiClassificationGenerator
    GENERATOR_ARGS: {}
  EPOCHS: 10
  FP16: false
  MODEL_SERVING: Unused
  OPTIMIZER_BUILDER: ClassificationOptimizer
  PLUGIN:
    HOOKS: always
    RESET: false
  SKIPEVAL: false
  TEST_FREQUENCY: 1
  TRAINER: MultiBranchTrainer
  TRAINER_ARGS:
    accumulation_steps: 4
LOGGING:
  INPUT_SIZE: null
  STEP_VER

In [None]:
eml.train()

15:57:50 Starting training
15:57:50 Logging to:	origtoolimgs-v1-multibranch-color-vtype-make-logger.log
15:57:50 Models will be saved to local directory:	origtoolimgs-v1-multibranch-color-vtype-make
15:57:50 Models will be saved with base name:	origtoolimgs-v1_epoch[].pth
15:57:50 Optimizers will be saved with base name:	origtoolimgs-v1_epoch[]_optimizer.pth
15:57:50 Schedulers will be saved with base name:	origtoolimgs-v1_epoch[]_scheduler.pth
15:57:50 Performing initial evaluation...
15:57:51 Obtained features, validation in progress
15:57:51 Metrics	color-fc	type-fc	fuse	colorbranch	typebranch
15:57:51 Accuracy	color-fc: 0.160	type-fc: 0.123	fuse: 0.037	colorbranch: 0.012	typebranch: 0.012
15:57:51 M F-Score	color-fc: 0.160	type-fc: 0.123	fuse: 0.037	colorbranch: 0.012	typebranch: 0.012
15:57:51 W F-Score	color-fc: 0.102	type-fc: 0.096	fuse: 0.018	colorbranch: 0.020	typebranch: 0.000
15:57:51 Starting training from 0
15:57:52 Parameter Group `opt-1`: Starting epoch 0 with 20 steps a

In [None]:
resp = eml.eval()

15:59:29 Obtained features, validation in progress
15:59:29 Metrics	color-fc	type-fc	fuse	colorbranch	typebranch
15:59:29 Accuracy	color-fc: 0.778	type-fc: 0.605	fuse: 0.309	colorbranch: 0.185	typebranch: 0.210
15:59:29 M F-Score	color-fc: 0.778	type-fc: 0.605	fuse: 0.309	colorbranch: 0.185	typebranch: 0.210
15:59:29 W F-Score	color-fc: 0.761	type-fc: 0.555	fuse: 0.285	colorbranch: 0.196	typebranch: 0.172
