In [1]:
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from gpt2_classifier import (
    init_gpt2,
    freeze_layers,
    GPT2ForSequenceClassification
)
from IPython.display import clear_output
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from transformers import AddedToken, GPT2Config, GPT2Tokenizer #, GPT2ForSequenceClassification, GPT2Model
from tqdm.autonotebook import tqdm
from utils import (
    calc_accuracy,
    custom_data_gen,
    init_data,
    init_quant_params,
    measure_inference_time,
    rnd,
    run_batch
)



In [2]:
LR = 1e-4
LR_DECAY = 0.8
MIN_LR = 5e-6
N_EPOCHS = 1
PLATEAU = 3
BATCH_SIZE = 256
MAX_LEN = 100
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.add_special_tokens({
    "pad_token": AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True)
})

0

In [4]:
with open("metrics_quant.json", "r") as f:
    dict_acc = json.load(f)

In [5]:
dict_acc

{'fp16': 0.9177964154411765,
 'dynamic16': 0.9177676930147058,
 'dynamic8': 0.9102424172794118,
 'dynamic4': 0.0744485294117647,
 'static16': 0.9177964154411765,
 'static8': 0.9102424172794118,
 'static4': 0.0732421875}

In [6]:
# dict_acc = {}

__Full precision accuracy__

In [7]:
quantization = {"type": "fp",
                "n_bits": 16}
data_infer = init_data("infer")

model_fp = init_gpt2(tokenizer,
                  DEVICE,
                  quantization=quantization)
checkpoint = "gpt2_best_epoch_1_loss_1.80571.pt"
model_fp.load_state_dict(torch.load(checkpoint))

acc_fp = calc_accuracy(model_fp,
                       data_infer,
                       tokenizer)
print(acc_fp)

dict_acc[quantization["type"]+str(quantization["n_bits"])] = acc_fp

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0it [00:00, ?it/s]

0.9177964154411765


__16 bits dynamic quant. accuracy__

In [8]:
quantization = {"type": "dynamic",
                "n_bits": 16}
data_infer = init_data("infer")


model_dyn_16 = init_gpt2(tokenizer,
                  DEVICE,
                  quantization=quantization)
checkpoint = "gpt2_best_epoch_1_loss_1.80571.pt"
model_dyn_16.load_state_dict(torch.load(checkpoint))

acc_dyn_16 = calc_accuracy(model_dyn_16,
                           data_infer,
                           tokenizer)
print(acc_dyn_16)

dict_acc[quantization["type"]+str(quantization["n_bits"])] = acc_dyn_16

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0it [00:00, ?it/s]

0.9177676930147058


__8 bits dynamic quant. accuracy__

In [9]:
quantization = {"type": "dynamic",
                "n_bits": 8}
data_infer = init_data("infer")


model_dyn_8 = init_gpt2(tokenizer,
                  DEVICE,
                  quantization=quantization)
checkpoint = "gpt2_best_epoch_1_loss_1.80571.pt"
model_dyn_8.load_state_dict(torch.load(checkpoint))

acc_dyn_8 = calc_accuracy(model_dyn_8,
                           data_infer,
                           tokenizer)
print(acc_dyn_8)

dict_acc[quantization["type"]+str(quantization["n_bits"])] = acc_dyn_8

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0it [00:00, ?it/s]

0.9102424172794118


__4 bits dynamic quant accuracy__

In [10]:
quantization = {"type": "dynamic",
                "n_bits": 4}
data_infer = init_data("infer")


model_dyn_4 = init_gpt2(tokenizer,
                        DEVICE,
                        quantization=quantization)
checkpoint = "gpt2_best_epoch_1_loss_1.80571.pt"
model_dyn_4.load_state_dict(torch.load(checkpoint))

acc_dyn_4 = calc_accuracy(model_dyn_4,
                          data_infer,
                          tokenizer)
print(acc_dyn_4)

dict_acc[quantization["type"]+str(quantization["n_bits"])] = acc_dyn_4

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0it [00:00, ?it/s]

0.0744485294117647


__16 bits static quant accuracy__

In [11]:
quantization = {"type": "static",
                "n_bits": 16}

data_quant = init_data("quant")
data_infer = init_data("infer")


model_stat_16 = init_gpt2(tokenizer,
                          DEVICE,
                          quantization=quantization)
checkpoint = "gpt2_best_epoch_1_loss_1.80571.pt"
model_stat_16.load_state_dict(torch.load(checkpoint))
model_stat_16 = init_quant_params(model_stat_16,
                                  data_quant,
                                  tokenizer)

acc_stat_16 = calc_accuracy(model_stat_16,
                            data_infer,
                            tokenizer)
print(acc_stat_16)

dict_acc[quantization["type"]+str(quantization["n_bits"])] = acc_stat_16

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0.9178251378676471


__8 bits static quant accuracy__

In [12]:
quantization = {"type": "static",
                "n_bits": 8}

data_quant = init_data("quant")
data_infer = init_data("infer")


model_stat_8 = init_gpt2(tokenizer,
                         DEVICE,
                         quantization=quantization)
checkpoint = "gpt2_best_epoch_1_loss_1.80571.pt"
model_stat_8.load_state_dict(torch.load(checkpoint))
model_stat_8 = init_quant_params(model_stat_8,
                                  data_quant,
                                  tokenizer)

acc_stat_8 = calc_accuracy(model_stat_8,
                           data_infer,
                           tokenizer)
print(acc_stat_8)

dict_acc[quantization["type"]+str(quantization["n_bits"])] = acc_stat_8

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0.9128274356617647


__4 bits static quant accuracy__

In [13]:
quantization = {"type": "static",
                "n_bits": 4}

data_quant = init_data("quant")
data_infer = init_data("infer")


model_stat_4 = init_gpt2(tokenizer,
                         DEVICE,
                         quantization=quantization)
checkpoint = "gpt2_best_epoch_1_loss_1.80571.pt"
model_stat_4.load_state_dict(torch.load(checkpoint))
model_stat_4 = init_quant_params(model_stat_4,
                                 data_quant,
                                 tokenizer)

acc_stat_4 = calc_accuracy(model_stat_4,
                           data_infer,
                           tokenizer)
print(acc_stat_4)

dict_acc[quantization["type"]+str(quantization["n_bits"])] = acc_stat_4

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0.29719094669117646


In [14]:
model_stat_16.transformer.h[10].attn.c_attn.static_batch

0

In [15]:
with open("metrics_quant.json", "w") as f:
    json.dump(dict_acc, f)