In [1]:
# SageMaker notebook
# Enabling the backward compatibility during the data updates using weight interpolation
# This is meant to be a lightweight notebook implementing weight interpolation for reducing regressions in data updates 
# For Natural Language Processing Models

# Consider citing us if you find this notebook useful
#@article{Schumann2023BCWI,
#  title={Backward Compatibility During Data Updates by Weight Interpolation
#},
#  author={Raphael Schumann and Elman Mansimov and Yi-An Lai and Nikolaos Pappas and Xibin Gao and Yi Zhang},
#  journal={ArXiv},
#  year={2023},
#}

In [2]:
# Install huggingface transformers and datasets
!pip install transformers
!pip install datasets

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p38/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mLooking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p38/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [3]:
# imports
import json
import os
import torch
import transformers
import datasets
import requests
import copy

from transformers import RobertaTokenizer

In [4]:
# Shared Variables
DATA_URL = "https://raw.githubusercontent.com/amazon-science/regression-constraint-model-upgrade/main/nlp/data/MASSIVE/"
PT_MODEL_NAME = 'roberta-base'

In [5]:
# Load tokenizer
tokenizer = []

def load_tokenizer():
    if len(tokenizer) == 0:
        tokenizer.append(RobertaTokenizer.from_pretrained(PT_MODEL_NAME))

load_tokenizer()

print (tokenizer)

[PreTrainedTokenizer(name_or_path='roberta-base', vocab_size=50265, model_max_len=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'unk_token': AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'sep_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'pad_token': AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'cls_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'mask_token': AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=True)})]


In [6]:
def tokenize_function(examples):
    """Helper tokenize function"""
    return tokenizer[0](examples['text'], padding=False, truncation=True, return_attention_mask=False)

def load_dataset(splits, file_template):
    """Helper function to load MASSIVE dataset"""
    data_files = dict()
    for split in splits:
        data_files[split] = file_template.format(split)

    print (data_files)
    dataset = datasets.load_dataset('json', data_files=data_files)
    dataset_info = json.loads(requests.get(file_template.format('info')[:-1]).content)

    new_label_ids = [dataset_info['labels'].index(c) for c in dataset_info['add_classes']]
    old_label_ids = [i for i, c in enumerate(dataset_info['labels']) if c not in dataset_info['add_classes']]
    dataset_info['new_label_ids'] = new_label_ids
    dataset_info['old_label_ids'] = old_label_ids

    tokenized_dataset = dataset.map(tokenize_function, batched=False)

    return tokenized_dataset, dataset_info

In [7]:
# COMPLETE: Pull the data before update
old_dataset_files = os.path.join(DATA_URL, "add_data", "old", "{}.jsonl")
old_dataset, old_dataset_info = load_dataset(['train', 'dev', 'test'], old_dataset_files)

# should contain 1000 lines in train, 333 lines in dev, and 4000 lines in test
print ("Old dataset before update")
print (old_dataset)

print ("Old dataset before update info")
print (old_dataset_info)

Using custom data configuration default-b127397a3b6c8c0e
Found cached dataset json (/home/ec2-user/.cache/huggingface/datasets/json/default-b127397a3b6c8c0e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)


{'train': 'https://raw.githubusercontent.com/amazon-science/regression-constraint-model-upgrade/main/nlp/data/MASSIVE/add_data/old/train.jsonl', 'dev': 'https://raw.githubusercontent.com/amazon-science/regression-constraint-model-upgrade/main/nlp/data/MASSIVE/add_data/old/dev.jsonl', 'test': 'https://raw.githubusercontent.com/amazon-science/regression-constraint-model-upgrade/main/nlp/data/MASSIVE/add_data/old/test.jsonl'}


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/ec2-user/.cache/huggingface/datasets/json/default-b127397a3b6c8c0e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-53a50176acadf523.arrow
Loading cached processed dataset at /home/ec2-user/.cache/huggingface/datasets/json/default-b127397a3b6c8c0e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-7f8616a06d581f36.arrow


  0%|          | 0/4000 [00:00<?, ?ex/s]

Old dataset before update
DatasetDict({
    train: Dataset({
        features: ['id', 'partition', 'label', 'text', 'label_name', 'input_ids'],
        num_rows: 1000
    })
    dev: Dataset({
        features: ['id', 'partition', 'label', 'text', 'label_name', 'input_ids'],
        num_rows: 333
    })
    test: Dataset({
        features: ['id', 'partition', 'label', 'text', 'label_name', 'input_ids'],
        num_rows: 4000
    })
})
Old dataset before update info
{'name': 'MASSIVE', 'splits': ['train', 'dev', 'test'], 'labels': ['audio_volume_other', 'play_music', 'iot_hue_lighton', 'general_greet', 'calendar_set', 'audio_volume_down', 'social_query', 'audio_volume_mute', 'iot_wemo_on', 'iot_hue_lightup', 'audio_volume_up', 'iot_coffee', 'takeaway_query', 'qa_maths', 'play_game', 'cooking_query', 'iot_hue_lightdim', 'iot_wemo_off', 'music_settings', 'weather_query', 'news_query', 'alarm_remove', 'social_post', 'recommendation_events', 'transport_taxi', 'takeaway_order', 'music_quer

In [8]:
## Setup Sagemaker environment
import sagemaker
from sagemaker.huggingface import HuggingFace
import boto3

# Use remote mode
sagemaker_region = boto3.Session().region_name
sagemaker_session = sagemaker.Session()
bucket_name = sagemaker_session.default_bucket()

role = sagemaker.get_execution_role()
instance_type = "ml.p3.2xlarge"

print (f"sagemaker region: {sagemaker_region}")
print (f"sagemaker session: {sagemaker_session}")
print (f"bucket name: {bucket_name}")
print (f"role: {role}")
print (f"instance type: {instance_type}")


sagemaker region: us-east-1
sagemaker session: <sagemaker.session.Session object at 0x7f6e731e6e80>
bucket name: sagemaker-us-east-1-339569644176
role: arn:aws:iam::339569644176:role/saabm
instance type: ml.p3.2xlarge


In [9]:
# COMPLETE: Train the model using the data before update
# Something along these lines

# git configuration to download regression-free training script
git_config = {
    "repo": "https://github.com/amazon-science/regression-constraint-model-upgrade.git",
    "branch": "main",
}

huggingface_estimator = HuggingFace(
        entry_point='train.py',
        source_dir='nlp',
        git_config=git_config,
        instance_type=instance_type,
        instance_count=1,
        role=role,
        transformers_version='4.17.0',
        pytorch_version='1.10.2',
        py_version='py38',
        hyperparameters = {
            "dataset": "MASSIVE",
            "scenario": "add_data",
            "data_type": "old",
            "bucket_name": bucket_name
        }
)

In [10]:
huggingface_estimator.fit()

Cloning into '/tmp/tmp2myvuk5e'...
Already on 'main'


Your branch is up to date with 'origin/main'.
2023-01-03 15:15:21 Starting - Starting the training job...
2023-01-03 15:15:50 Starting - Preparing the instances for trainingProfilerReport-1672758921: InProgress
.....................
2023-01-03 15:19:10 Downloading - Downloading input data...
2023-01-03 15:19:50 Training - Downloading the training image......
2023-01-03 15:20:50 Training - Training image download completed. Training in progress..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2023-01-03 15:20:47,526 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2023-01-03 15:20:47,552 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2023-01-03 15:20:47,554 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2023-01-03 15:20:47,946 sagemaker-training-toolkit INFO     

In [11]:
# COMPLETE: Load old model from S3 
# Used for inference and calculating negative flip rate
import boto3

s3 = boto3.client('s3')

load_from_s3 = f"./bcwi_nlp_outputs/v1/MASSIVE/1111/add_data/old_model/model"

print (f"Loading from S3 {load_from_s3}")
os.makedirs("old_model_dir", exist_ok=True)
# load stuff from s3
with open("old_model_dir/config.json", "wb") as f:
    s3.download_fileobj(bucket_name, os.path.join(load_from_s3, "config.json"), f)
    print("Downloaded old_model_dir/config.json")
with open("old_model_dir/hparams.json", "wb") as f:
    s3.download_fileobj(bucket_name, os.path.join(load_from_s3, "hparams.json"), f)
    print("Downloaded old_model_dir/hparams.json")
with open("old_model_dir/pytorch_model.bin", "wb") as f:
    s3.download_fileobj(bucket_name, os.path.join(load_from_s3, "pytorch_model.bin"), f)
    print("Downloaded old_model_dir/pytorch_model.bin")

from transformers import RobertaForSequenceClassification
old_model = RobertaForSequenceClassification.from_pretrained("old_model_dir")

print (old_model)

Loading from S3 ./bcwi_nlp_outputs/v1/MASSIVE/1111/add_data/old_model/model
Downloaded old_model_dir/config.json
Downloaded old_model_dir/hparams.json
Downloaded old_model_dir/pytorch_model.bin
RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
             

In [12]:
# COMPLETE: Pull the data after update
new_dataset_files = os.path.join(DATA_URL, "add_data", "updated", "{}.jsonl")
new_dataset, new_dataset_info = load_dataset(['train', 'dev', 'test'], new_dataset_files)

# should contain 1500 lines in train, 500 lines in dev, and 4000 lines in test
print ("New dataset after update")
print (new_dataset)

print ("New dataset after update info")
print (new_dataset_info)

{'train': 'https://raw.githubusercontent.com/amazon-science/regression-constraint-model-upgrade/main/nlp/data/MASSIVE/add_data/updated/train.jsonl', 'dev': 'https://raw.githubusercontent.com/amazon-science/regression-constraint-model-upgrade/main/nlp/data/MASSIVE/add_data/updated/dev.jsonl', 'test': 'https://raw.githubusercontent.com/amazon-science/regression-constraint-model-upgrade/main/nlp/data/MASSIVE/add_data/updated/test.jsonl'}


Using custom data configuration default-677ff51009bd10bc


Downloading and preparing dataset json/default to /home/ec2-user/.cache/huggingface/datasets/json/default-677ff51009bd10bc/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/35.8k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/13.0k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/94.7k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating dev split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /home/ec2-user/.cache/huggingface/datasets/json/default-677ff51009bd10bc/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1500 [00:00<?, ?ex/s]

  0%|          | 0/500 [00:00<?, ?ex/s]

  0%|          | 0/4000 [00:00<?, ?ex/s]

New dataset after update
DatasetDict({
    train: Dataset({
        features: ['id', 'partition', 'label', 'text', 'label_name', 'input_ids'],
        num_rows: 1500
    })
    dev: Dataset({
        features: ['id', 'partition', 'label', 'text', 'label_name', 'input_ids'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'partition', 'label', 'text', 'label_name', 'input_ids'],
        num_rows: 4000
    })
})
New dataset after update info
{'name': 'MASSIVE', 'splits': ['train', 'dev', 'test'], 'labels': ['audio_volume_other', 'play_music', 'iot_hue_lighton', 'general_greet', 'calendar_set', 'audio_volume_down', 'social_query', 'audio_volume_mute', 'iot_wemo_on', 'iot_hue_lightup', 'audio_volume_up', 'iot_coffee', 'takeaway_query', 'qa_maths', 'play_game', 'cooking_query', 'iot_hue_lightdim', 'iot_wemo_off', 'music_settings', 'weather_query', 'news_query', 'alarm_remove', 'social_post', 'recommendation_events', 'transport_taxi', 'takeaway_order', 'music_query'

In [13]:
# COMPLETE: Get the old data checkpoint and continue training on the new data 

# git configuration to download regression-free training script
git_config = {
    "repo": "https://github.com/amazon-science/regression-constraint-model-upgrade.git",
    "branch": "main",
}

huggingface_estimator_new = HuggingFace(
        entry_point='train.py',
        source_dir='nlp',
        git_config=git_config,
        instance_type=instance_type,
        instance_count=1,
        role=role,
        transformers_version='4.17.0',
        pytorch_version='1.10.2',
        py_version='py38',
        hyperparameters = {
            "dataset": "MASSIVE",
            "scenario": "add_data",
            "data_type": "updated",
            "load_from_s3": f"./bcwi_nlp_outputs/v1/MASSIVE/1111/add_data/old_model/model",
            "bucket_name": bucket_name,
            "output_dir": "add_data",
            "num_epochs": 3
        }
)

In [14]:
huggingface_estimator_new.fit()

Cloning into '/tmp/tmpqclqbr5t'...


Your branch is up to date with 'origin/main'.


Already on 'main'


2023-01-03 15:31:11 Starting - Starting the training job...
2023-01-03 15:31:36 Starting - Preparing the instances for trainingProfilerReport-1672759870: InProgress
.....................
2023-01-03 15:34:55 Downloading - Downloading input data...
2023-01-03 15:35:35 Training - Downloading the training image......
2023-01-03 15:36:36 Training - Training image download completed. Training in progress..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2023-01-03 15:36:36,193 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2023-01-03 15:36:36,219 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2023-01-03 15:36:36,222 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2023-01-03 15:36:36,586 sagemaker-training-toolkit INFO     Invoking user script[0m
[34mTraining Env:[0

In [15]:
# COMPLETE: Load new model from S3
# Used for inference and calculating negative flip rate

load_from_s3 = f"./bcwi_nlp_outputs/add_data/MASSIVE/1111/add_data/old_model/model"

print (f"Loading from S3 {load_from_s3}")
os.makedirs("new_model_dir", exist_ok=True)
# load stuff from s3
with open("new_model_dir/config.json", "wb") as f:
    s3.download_fileobj(bucket_name, os.path.join(load_from_s3, "config.json"), f)
    print("Downloaded new_model_dir/config.json")
with open("new_model_dir/hparams.json", "wb") as f:
    s3.download_fileobj(bucket_name, os.path.join(load_from_s3, "hparams.json"), f)
    print("Downloaded new_model_dir/hparams.json")
with open("new_model_dir/pytorch_model.bin", "wb") as f:
    s3.download_fileobj(bucket_name, os.path.join(load_from_s3, "pytorch_model.bin"), f)
    print("Downloaded new_model_dir/pytorch_model.bin")

from transformers import RobertaForSequenceClassification
new_model = RobertaForSequenceClassification.from_pretrained("new_model_dir")

print (new_model)

Loading from S3 ./bcwi_nlp_outputs/add_data/MASSIVE/1111/add_data/old_model/model
Downloaded new_model_dir/config.json
Downloaded new_model_dir/hparams.json
Downloaded new_model_dir/pytorch_model.bin
RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
       

In [16]:
# Prepare for calculating negative flip rate and accuracy
test_data = new_dataset["test"] # test sets in old and new datasets are the same

def calculate_accuracy(data, model, batch_size=40):
    all_preds = []
    all_labels = []
    for i in range(0, len(data), batch_size):
        if int(i/batch_size) % 10 == 0:
            print (f"working on {int(i/batch_size)} out of {int(len(data)/batch_size)}")
        torch.cuda.empty_cache() # clear memory
        # process examples in the batch
        examples = data[i:(i+batch_size)]
        text = examples["text"]
        label = examples["label"]
        text_tokenizer = tokenizer[0](text, padding="max_length", truncation=True, max_length=max([len(t.split()) for t in text]),
                                   return_tensors="pt")
        
        with torch.no_grad():
            outputs = model(**text_tokenizer)
        preds = outputs.logits.argmax(-1).tolist()

        # merge them into the list that combines all labels and predictions
        all_preds.extend(preds)
        all_labels.extend(label)
    return all_preds, all_labels

In [17]:
# Get old model prediction
print ("Getting old model predictions")
old_model_preds, labels = calculate_accuracy(test_data, old_model)

Getting old model predictions
working on 0 out of 100
working on 10 out of 100
working on 20 out of 100
working on 30 out of 100
working on 40 out of 100
working on 50 out of 100
working on 60 out of 100
working on 70 out of 100
working on 80 out of 100
working on 90 out of 100


In [18]:
# Get new model prediction
print ("Getting new model predictions")
new_model_preds, _ = calculate_accuracy(test_data, new_model)

Getting new model predictions
working on 0 out of 100
working on 10 out of 100
working on 20 out of 100
working on 30 out of 100
working on 40 out of 100
working on 50 out of 100
working on 60 out of 100
working on 70 out of 100
working on 80 out of 100
working on 90 out of 100


In [19]:
# Get old model accuracy
old_acc = 100 * sum([old_pred == l for old_pred, l in zip(old_model_preds, labels)]) / float(len(labels))
print (f"Old model accuracy {old_acc}%")

# Get new model accuracy
new_acc = 100 * sum([new_pred == l for new_pred, l in zip(new_model_preds, labels)]) / float(len(labels))
print (f"New model accuracy {new_acc}%")

# Calculate negative flip rate
nfr = 100 * sum([old_pred == l and new_pred != l for old_pred, new_pred, l in zip(old_model_preds, new_model_preds, labels)]) / float(len(labels))
print (f"Negative Flip Rate {nfr}%")


Old model accuracy 81.7%
New model accuracy 82.55%
Negative Flip Rate 3.25%


In [20]:
# Interpolate between old and new model
# More details can be found in the Github repo

def interpolate_weights(old_model, new_models, alpha, new_label_ids=None, weighted=None):

    # Form soup ensemble of new models
    new_state_dicts = [new_model.state_dict() for new_model in new_models]
    new_model_state_dict = dict()
    for key in new_models[0].state_dict():
        if not (key.endswith('bias') or key.endswith('weight')):
            continue

        new_model_state_dict[key] = torch.mean(torch.stack([s[key] for s in new_state_dicts]), dim=0)

    print('alpha', alpha)
    metrics = dict()

    # Use the old model as the basis of the interpolated model weights
    model = copy.deepcopy(old_model)
    # All weights of a model can be accessed by its state_dict
    state_dict = model.state_dict()
    for key in state_dict:
        # Be sure to only interpolate weight matrices; includes e.g. layer norm matrices
        if not (key.endswith('bias') or key.endswith('weight')):
            continue

        if weighted is not None:

            # when alpha = 1.0, there can be NaN values due to numerical instabilities when values in the weight
            # matrix are too small. In this case we replace the NaNs with the weights of the old model.
            if alpha == 1.0:
                c = state_dict[key].detach().clone()

            # Inplace operations to modify the weights of the model.
            # State_dict initially holds the weights of the old model.
            state_dict[key] *= (alpha * weighted[key])
            state_dict[key] += ((1-alpha) * new_model_state_dict[key])
            state_dict[key] /= (alpha * weighted[key] + (1-alpha))

            # Three lines above as one-liner
            #state_dict[key].data.copy_(((alpha * weighted[key] * state_dict[key]) + ((1-alpha) * new_model_state_dict[key])) / (alpha * weighted[key] + (1-alpha)))


            if alpha == 1.0:
                nans = state_dict[key] != state_dict[key]
                state_dict[key][nans] = c[nans]
        else:
            # Simple linear interpolation with parameter alpha.
            # State_dict initially holds the weights of the old model.
            state_dict[key] *= alpha
            state_dict[key] += ((1 - alpha) * new_model_state_dict[key])

        # Copy classifier weights of new classes from the new model. The old model was not trained on those classes.
        if new_label_ids:
            if key == 'classifier.out_proj.weight':
                state_dict[key][new_label_ids, :] = new_model_state_dict[key][new_label_ids, :]
            if key == 'classifier.out_proj.bias':
                state_dict[key][new_label_ids] = new_model_state_dict[key][new_label_ids]
    return state_dict


In [21]:
# Interpolated model
interpolated_model = RobertaForSequenceClassification.from_pretrained("old_model_dir")

# Initialize interpolated model with old model weights
interpolated_state_dict = interpolate_weights(interpolated_model, [new_model], alpha=0.3)
# load interpolated_state_dict into new model
interpolated_model.load_state_dict(interpolated_state_dict, strict=True)

alpha 0.3


<All keys matched successfully>

In [22]:
# Get interpolated model prediction
print ("Getting interpolated model predictions")
interpolated_model_preds, _ = calculate_accuracy(test_data, interpolated_model)

Getting interpolated model predictions
working on 0 out of 100
working on 10 out of 100
working on 20 out of 100
working on 30 out of 100
working on 40 out of 100
working on 50 out of 100
working on 60 out of 100
working on 70 out of 100
working on 80 out of 100
working on 90 out of 100


In [23]:
# Get old model accuracy
old_acc = 100 * sum([old_pred == l for old_pred, l in zip(old_model_preds, labels)]) / float(len(labels))
print (f"Old model accuracy {old_acc}%")

# Get new model accuracy
new_acc = 100 * sum([new_pred == l for new_pred, l in zip(new_model_preds, labels)]) / float(len(labels))
print (f"New model accuracy {new_acc}%")

# Get interpolated model accuracy
interpolated_acc = 100 * sum([interpolated_pred == l for interpolated_pred, l in zip(interpolated_model_preds, labels)]) / float(len(labels))
print (f"Interpolated model accuracy {interpolated_acc}%")


# Calculate negative flip rate
nfr = 100 * sum([old_pred == l and new_pred != l for old_pred, new_pred, l in zip(old_model_preds, new_model_preds, labels)]) / float(len(labels))
print (f"Negative Flip Rate {nfr}%")

# Calculate negative flip rate of old model and interpolated models
interpolate_nfr = 100 * sum([old_pred == l and interpolate_pred != l for old_pred, interpolate_pred, l in zip(old_model_preds, interpolated_model_preds, labels)]) / float(len(labels))
print (f"Negative Flip Rate Old and Interpolated (Ours) {interpolate_nfr}%")

Old model accuracy 81.7%
New model accuracy 82.55%
Interpolated model accuracy 82.65%
Negative Flip Rate 3.25%
Negative Flip Rate Old and Interpolated (Ours) 2.225%
